diff --git a/.github/release-settings.xml b/.github/release-settings.xml new file mode 100644 index 0000000..be56a53 --- /dev/null +++ b/.github/release-settings.xml @@ -0,0 +1,20 @@ + + + + eu.maveniverse.maven.plugins + + + + + sonatype-central-portal + ${env.MAVEN_USER} + ${env.MAVEN_PASSWORD} + + sonatype-cp + njord:template:release-sca + + + + + diff --git a/.github/workflows/basic.yml b/.github/workflows/basic.yml new file mode 100644 index 0000000..7f12ed7 --- /dev/null +++ b/.github/workflows/basic.yml @@ -0,0 +1,24 @@ +name: Java CI + +on: [push, pull_request] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + java: [ 21 ] + steps: + - uses: actions/checkout@v4.1.1 + - name: Set up JDK + uses: actions/setup-java@v4.1.0 + with: + java-version: ${{ matrix.java }} + distribution: 'adopt' + - name: Build and test with Maven + run: mvn package + - name: Build example + run: javac -cp target/classes/:. example.java + - name: Run example + run: java -cp target/classes/:. example diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..d6ad167 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,90 @@ +name: Release + +on: + workflow_dispatch: + inputs: + releaseVersion: + description: "Release version, e.g. 0.3.6 (optional — auto-detected from the current POM)" + required: false + +jobs: + release: + runs-on: ubuntu-latest + permissions: + contents: write # to automatically create tags + + steps: + - name: Validate release version + if: ${{ github.event.inputs.releaseVersion != '' }} + run: | + RELEASE=${{ github.event.inputs.releaseVersion }} + if [[ ! $RELEASE =~ ^[0-9]+\.[0-9]+\.[0-9]+(-SNAPSHOT)?$ ]]; then + echo "Error: releaseVersion '$RELEASE' is not in the correct format x.y.z or x.y.z-SNAPSHOT" + exit 1 + fi + + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: master + + - name: Set up Java + uses: actions/setup-java@v4 + with: + java-version: '21' + distribution: 'adopt' + gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }} + gpg-passphrase: MAVEN_GPG_PASSPHRASE + + - name: Configure git + run: | + git config user.email "actions@github.com" + git config user.name "GitHub Actions" + + - name: Prepare Release + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} + run: | + MVN_ARGS="" + if [ -n "${{ github.event.inputs.releaseVersion }}" ]; then + MVN_ARGS="$MVN_ARGS -DreleaseVersion=${{ github.event.inputs.releaseVersion }}" + fi + mvn -B release:prepare $MVN_ARGS + + - name: Check release.properties + run: | + if [ ! -f release.properties ]; then + echo "release.properties not found" + exit 1 + fi + echo "Contents of release.properties:" + cat release.properties + + - name: Determine release version + id: version + run: | + export TAG=$(grep 'scm.tag=' release.properties | cut -d'=' -f2) + export VERSION=${TAG#JavaFastPFOR-} + + echo "released_tag=${TAG}" >> $GITHUB_OUTPUT + echo "released_version=${VERSION}" >> $GITHUB_OUTPUT + + echo "Releasing tag: ${TAG}" + echo "Releasing version: ${VERSION}" + + - name: Release + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} + MAVEN_GPG_KEY: ${{ secrets.GPG_PRIVATE_KEY }} + MAVEN_USER: ${{ secrets.MAVEN_USER }} + MAVEN_PASSWORD: ${{ secrets.MAVEN_PASSWORD }} + run: | + mvn -B release:perform -Darguments="-DskipTests -DaltDeploymentRepository=id::default::njord: -Dnjord.autoPublish=true -Dnjord.publishingType=automatic" -s .github/release-settings.xml + + - name: Create GitHub Release + run: gh release create "${{ steps.version.outputs.released_tag }}" --generate-notes --title "Version ${{ steps.version.outputs.released_version }}" + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 53960d2..5a78c84 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .classpath +.settings .project *.class *.csv @@ -6,3 +7,5 @@ tags target/ tmp/ /bin +.idea +*.iml diff --git a/README.md b/README.md index 000695c..0246789 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,10 @@ JavaFastPFOR: A simple integer compression library in Java ========================================================== - [![][maven img]][maven] [![][license img]][license] [![docs-badge][]][docs] -[![Code Quality: Cpp](https://img.shields.io/lgtm/grade/java/g/lemire/JavaFastPFOR.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/lemire/JavaFastPFOR/context:java) +[![](https://jitpack.io/v/fast-pack/JavaFastPFor.svg)](https://jitpack.io/#fast-pack/JavaFastPFor) [![][license img]][license] [![docs-badge][]][docs] +[![Java CI](https://github.com/lemire/JavaFastPFOR/actions/workflows/basic.yml/badge.svg)](https://github.com/lemire/JavaFastPFOR/actions/workflows/basic.yml) -License -------- - -This code is released under the -Apache License Version 2.0 http://www.apache.org/licenses/. - What does this do? ------------------ @@ -49,13 +43,50 @@ as well as in GMAP and GSNAP (http://research-pub.gene.com/gmap/). Usage ------ -Really simple usage: ```java - IntegratedIntCompressor iic = new IntegratedIntCompressor(); - int[] data = ... ; // to be compressed - int[] compressed = iic.compress(data); // compressed array - int[] recov = iic.uncompress(compressed); // equals to data +package org.example; + +import me.lemire.integercompression.FastPFOR128; +import me.lemire.integercompression.IntWrapper; + +import java.util.Arrays; + +public class Main { + public static void main(String[] args) { + FastPFOR128 fastpfor = new FastPFOR128(); + + int N = 9984; + int[] data = new int[N]; + for (var i = 0; i < N; i += 150) { + data[i] = i; + } + + int[] compressedoutput1 = new int[N + 1024]; + + IntWrapper inputoffset1 = new IntWrapper(0); + IntWrapper outputoffset1 = new IntWrapper(0); + + fastpfor.compress(data, inputoffset1, N, compressedoutput1, outputoffset1); + int compressedsize1 = outputoffset1.get(); + + int[] recovered1 = new int[N]; + inputoffset1 = new IntWrapper(0); + outputoffset1 = new IntWrapper(0); + fastpfor.uncompress(compressedoutput1, outputoffset1, compressedsize1, recovered1, inputoffset1); + + // quick verification: count mismatches + int mismatches = 0; + for (int i = 0; i < N; i++) { + if (data[i] != recovered1[i]) mismatches++; + } + + System.out.println("N=" + N + " compressedSizeWords=" + compressedsize1 + " mismatches=" + mismatches); + System.out.println("first 20 original: " + Arrays.toString(Arrays.copyOf(data, 20))); + System.out.println("first 20 recovered: " + Arrays.toString(Arrays.copyOf(recovered1, 20))); + } +} + ``` For more examples, see example.java or the examples folder. @@ -67,38 +98,83 @@ in sorted orders and use differential coding (they compress deltas). They can be found in the package me.lemire.integercompression.differential. Most others do not. +The Java Team at Intel (R) introduced the vector implementation for FastPFOR +based on the Java Vector API that showed significant gains over the +non-vectorized implementation. For an example usage, see +examples/vector/Example.java. The feature requires JDK 19+ and is currently for +advanced users. -Maven central repository +JavaFastPFOR as a dependency ------------------------ -Using this code in your own project is easy with maven, just add -the following code in your pom.xml file: +JavaFastPFOR is available both on Maven Central and JitPack, so you can easily +include it in your project using either source. + +We have a demo project using JavaFastPFOR as a dependency (both Maven and Gradle). See... + +https://github.com/fast-pack/JavaFastPFORDemo + +### Maven Central + +You can add JavaFastPFOR directly from Maven Central — no extra repository configuration needed: + +**Maven** ```xml - - - me.lemire.integercompression - JavaFastPFOR - [0.1,) - - + + me.lemire.integercompression + JavaFastPFOR + 0.3.8 + ``` -Naturally, you should replace "version" by the version -you desire. +**Gradle (Groovy)** +```groovy +dependencies { + implementation 'me.lemire.integercompression:JavaFastPFOR:0.3.8' +} +``` +### JitPack -You can also download JavaFastPFOR from the Maven central repository: -http://repo1.maven.org/maven2/me/lemire/integercompression/JavaFastPFOR/ +If you prefer or need to use JitPack, you can include the dependency like this: +**Maven** -Why? ----- +```xml + + + jitpack.io + https://jitpack.io + + + + + com.github.fast-pack + JavaFastPFOR + JavaFastPFOR-0.3.8 + +``` + +**Gradle (groovy)** + +```groovy +repositories { + mavenCentral() + maven { + url 'https://jitpack.io' + } +} + +dependencies { + implementation 'com.github.fast-pack:JavaFastPFOR:JavaFastPFOR-0.3.8' +} +``` + +Naturally, you should replace "version" by the version +you desire. -We found no library that implemented state-of-the-art integer coding techniques -such as Binary Packing, NewPFD, OptPFD, Variable Byte, Simple 9 and so on in Java. -We wrote one. Thread safety ---- @@ -111,19 +187,6 @@ Nevertheless, if you want to reuse codec instances, note that by convention, unless the documentation of a codec specify that it is not thread-safe, then it can be assumed to be thread-safe. -Authors -------- - -Main contributors -* Daniel Lemire, http://lemire.me/en/ -* Muraoka Taro, https://github.com/koron - -with contributions by -* the Terrier team (Matteo Catena, Craig Macdonald, Saúl Vargas and Iadh Ounis) -* Di Wu, http://www.facebook.com/diwu1989 -* Stefan Ackermann, https://github.com/Stivo -* Samit Roy, https://github.com/roysamit - How does it compare to the Kamikaze PForDelta library? ------------------------------------------------------ @@ -141,19 +204,16 @@ Reference: Requirements ------------ -A recent Java compiler. Java 7 or better is recommended. +Releases up to 0.1.12 require Java 7 or better. -Good instructions on installing Java 7 on Linux: +The current development versions assume JDK 21 or better. -http://forums.linuxmint.com/viewtopic.php?f=42&t=93052 How fast is it? --------------- -Compile the code and execute me.lemire.integercompression.benchmarktools.Benchmark. - -I recommend running all the benchmarks with the "-server" flag on a desktop machine. +Compile the code and execute `me.lemire.integercompression.benchmarktools.Benchmark`. Speed is always reported in millions of integers per second. @@ -161,11 +221,21 @@ Speed is always reported in millions of integers per second. For Maven users --------------- -mvn compile +``` +mvn compile mvn exec:java +``` + +You may run our examples as follows: + +``` +mvn package +javac -cp target/classes/:. example.java +java -cp target/classes/:. example +``` -For ant users +For ant users (legacy, currently untested) ------------- If you use Apache ant, please try this: @@ -182,6 +252,21 @@ API Documentation http://www.javadoc.io/doc/me.lemire.integercompression/JavaFastPFOR/ + +Citing this work +----------------- + +If you use JavaFastPFOR in your work, please consider citing the project. A recommended BibTeX entry is: + +```bibtex +@misc{lemire2025_javafastpfor, + author = {Daniel Lemire}, + title = {{JavaFastPFOR: A simple integer compression library in Java}}, + year = {2025}, + howpublished = {\url{https://github.com/fast-pack/JavaFastPFOR}}, +} +``` + Want to read more? ------------------ @@ -202,13 +287,14 @@ We wrote several research papers documenting many of the CODECs implemented here Ikhtear Sharif wrote his M.Sc. thesis on this library: Ikhtear Sharif, Performance Evaluation of Fast Integer Compression Techniques Over Tables, M.Sc. thesis, UNB 2013. -http://lemire.me/fr/documents/thesis/IkhtearThesis.pdf +https://unbscholar.lib.unb.ca/islandora/object/unbscholar%3A9399/datastream/PDF/view He also posted his slides online: http://www.slideshare.net/ikhtearSharif/ikhtear-defense Other recommended libraries ----------------------------- +* Fast integer compression in Go: https://github.com/ronanh/intcomp * Encoding: Integer Compression Libraries for Go https://github.com/zhenjl/encoding * CSharpFastPFOR: A C# integer compression library https://github.com/Genbox/CSharpFastPFOR * TurboPFor is a C library that offers lots of interesting optimizations and Java wrappers. Well worth checking! (Uses a GPL license.) https://github.com/powturbo/TurboPFor @@ -219,8 +305,6 @@ Funding This work was supported by NSERC grant number 26143. -[maven img]:https://maven-badges.herokuapp.com/maven-central/me.lemire.integercompression/JavaFastPFOR/badge.svg -[maven]:http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22me.lemire.integercompression%22%20 [license]:LICENSE [license img]:https://img.shields.io/badge/License-Apache%202-blue.svg diff --git a/benchmarkresults/benchmarkresults_haswell_18sept2014.txt b/benchmarkresults/benchmarkresults_haswell_18sept2014.txt index 43fa98b..a501d5d 100644 --- a/benchmarkresults/benchmarkresults_haswell_18sept2014.txt +++ b/benchmarkresults/benchmarkresults_haswell_18sept2014.txt @@ -1,7 +1,7 @@ # benchmark based on the ClusterData model from: -# Vo Ngoc Anh and Alistair Moffat. -# Index compression using 64-bit words. -# Softw. Pract. Exper.40, 2 (February 2010), 131-147. +# Vo Ngoc Anh and Alistair Moffat. +# Index compression using 64-bit words. +# Softw. Pract. Exper.40, 2 (February 2010), 131-147. # Results will be written into a CSV file: benchmark-20140918T011257.csv @@ -10,852 +10,852 @@ # generating random data... ok. # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.56 246 1061 + 2.56 246 1061 # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 3.21 66 275 + 3.21 66 275 # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.96 838 1679 + 2.96 838 1679 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1276 1805 + 32.00 1276 1805 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.00 490 509 + 8.00 490 509 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.00 582 774 + 8.00 582 774 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.96 765 1193 + 2.96 765 1193 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.88 139 896 + 2.88 139 896 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.90 166 905 + 2.90 166 905 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.88 139 898 + 2.88 139 898 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.87 25 938 + 2.87 25 938 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.90 29 960 + 2.90 29 960 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.87 25 882 + 2.87 25 882 # IntegratedFastPFOR + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.63 274 1015 + 2.63 274 1015 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 2.83 280 771 + 2.83 280 771 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.84 444 837 + 2.84 444 837 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.27 498 652 + 3.27 498 652 # sparsity 2 # generating random data... # generating random data... ok. # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.51 244 1048 + 3.51 244 1048 # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 4.18 55 247 + 4.18 55 247 # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.93 862 1611 + 3.93 862 1611 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1286 1816 + 32.00 1286 1816 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.01 486 508 + 8.01 486 508 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.01 575 763 + 8.01 575 763 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.93 774 1159 + 3.93 774 1159 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.83 118 865 + 3.83 118 865 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.86 141 875 + 3.86 141 875 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.83 118 867 + 3.83 118 867 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.82 18 881 + 3.82 18 881 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.85 22 887 + 3.85 22 887 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.82 18 838 + 3.82 18 838 # IntegratedFastPFOR + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.58 273 990 + 3.58 273 990 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 3.82 201 656 + 3.82 201 656 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.90 442 819 + 3.90 442 819 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.50 494 640 + 4.50 494 640 # sparsity 3 # generating random data... # generating random data... ok. # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.28 244 1030 + 4.28 244 1030 # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 4.95 51 247 + 4.95 51 247 # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.71 850 1577 + 4.71 850 1577 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1252 1769 + 32.00 1252 1769 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.02 478 504 + 8.02 478 504 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.02 573 762 + 8.02 573 762 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.71 770 1139 + 4.71 770 1139 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.60 107 850 + 4.60 107 850 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.63 127 863 + 4.63 127 863 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.60 107 853 + 4.60 107 853 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.59 14 865 + 4.59 14 865 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.62 18 882 + 4.62 18 882 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.59 14 844 + 4.59 14 844 # IntegratedFastPFOR + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.34 268 969 + 4.34 268 969 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 4.72 170 610 + 4.72 170 610 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.68 434 783 + 4.68 434 783 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.33 472 624 + 5.33 472 624 # sparsity 4 # generating random data... # generating random data... ok. # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.03 239 1004 + 5.03 239 1004 # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 5.73 47 251 + 5.73 47 251 # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.48 846 1556 + 5.48 846 1556 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1274 1799 + 32.00 1274 1799 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.12 439 486 + 8.12 439 486 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.12 537 715 + 8.12 537 715 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.48 769 1134 + 5.48 769 1134 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.36 95 817 + 5.36 95 817 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.39 115 838 + 5.39 115 838 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.36 96 827 + 5.36 96 827 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.34 12 842 + 5.34 12 842 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.37 16 871 + 5.37 16 871 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.34 12 803 + 5.34 12 803 # IntegratedFastPFOR + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.09 268 963 + 5.09 268 963 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 5.57 150 587 + 5.57 150 587 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.47 432 800 + 5.47 432 800 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.16 491 635 + 6.16 491 635 # sparsity 5 # generating random data... # generating random data... ok. # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.00 236 999 + 6.00 236 999 # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 6.70 43 242 + 6.70 43 242 # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.45 863 1584 + 6.45 863 1584 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1236 1792 + 32.00 1236 1792 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.40 369 452 + 8.40 369 452 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.40 486 617 + 8.40 486 617 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.45 777 1132 + 6.45 777 1132 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.33 86 808 + 6.33 86 808 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.36 103 828 + 6.36 103 828 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.33 86 813 + 6.33 86 813 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.31 9 825 + 6.31 9 825 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.34 13 858 + 6.34 13 858 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.31 9 819 + 6.31 9 819 # IntegratedFastPFOR + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.06 265 945 + 6.06 265 945 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 6.65 139 546 + 6.65 139 546 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.45 442 804 + 6.45 442 804 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.18 493 635 + 7.18 493 635 # sparsity 6 # generating random data... # generating random data... ok. # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.76 238 998 + 6.76 238 998 # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 7.45 42 251 + 7.45 42 251 # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.20 854 1525 + 7.20 854 1525 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1177 1663 + 32.00 1177 1663 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.10 259 362 + 9.10 259 362 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.10 380 450 + 9.10 380 450 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.20 718 1098 + 7.20 718 1098 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.08 79 786 + 7.08 79 786 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.11 95 821 + 7.11 95 821 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.08 81 814 + 7.08 81 814 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.06 8 836 + 7.06 8 836 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.09 11 860 + 7.09 11 860 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.06 8 822 + 7.06 8 822 # IntegratedFastPFOR + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.81 268 962 + 6.81 268 962 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 7.56 129 509 + 7.56 129 509 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.19 433 789 + 7.19 433 789 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.93 491 632 + 7.93 491 632 # sparsity 7 # generating random data... # generating random data... ok. # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.05 236 985 + 8.05 236 985 # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 8.75 39 247 + 8.75 39 247 # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.50 861 1526 + 8.50 861 1526 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1279 1788 + 32.00 1279 1788 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.11 190 305 + 10.11 190 305 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.11 311 355 + 10.11 311 355 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.50 753 1092 + 8.50 753 1092 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.37 71 792 + 8.37 71 792 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.40 83 804 + 8.40 83 804 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.37 72 805 + 8.37 72 805 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.35 7 808 + 8.35 7 808 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.38 10 835 + 8.38 10 835 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.35 7 796 + 8.35 7 796 # IntegratedFastPFOR + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.10 259 920 + 8.10 259 920 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 9.16 111 447 + 9.16 111 447 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.52 435 784 + 8.52 435 784 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.32 485 622 + 9.32 485 622 # sparsity 8 # generating random data... # generating random data... ok. # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.73 234 972 + 8.73 234 972 # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 9.44 37 250 + 9.44 37 250 # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.19 848 1493 + 9.19 848 1493 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1279 1858 + 32.00 1279 1858 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.04 167 307 + 11.04 167 307 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.04 309 353 + 11.04 309 353 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.19 751 1095 + 9.19 751 1095 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.06 67 770 + 9.06 67 770 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.09 78 781 + 9.09 78 781 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.06 68 792 + 9.06 68 792 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.03 6 795 + 9.03 6 795 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.07 9 824 + 9.07 9 824 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.03 6 787 + 9.03 6 787 # IntegratedFastPFOR + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.78 266 936 + 8.78 266 936 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 10.34 101 427 + 10.34 101 427 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.21 437 794 + 9.21 437 794 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.01 488 626 + 10.01 488 626 # sparsity 9 # generating random data... # generating random data... ok. # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.75 234 980 + 9.75 234 980 # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 10.48 36 242 + 10.48 36 242 # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.21 844 1474 + 10.21 844 1474 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1262 1795 + 32.00 1262 1795 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.18 145 300 + 12.18 145 300 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.18 302 340 + 12.18 302 340 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.21 761 1096 + 10.21 761 1096 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.08 63 786 + 10.08 63 786 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.12 72 752 + 10.12 72 752 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.08 63 783 + 10.08 63 783 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.05 6 787 + 10.05 6 787 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.09 8 798 + 10.09 8 798 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.05 6 779 + 10.05 6 779 # IntegratedFastPFOR + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.80 264 930 + 9.80 264 930 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 11.77 92 410 + 11.77 92 410 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.23 438 789 + 10.23 438 789 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.05 486 624 + 11.05 486 624 # sparsity 10 # generating random data... # generating random data... ok. # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.73 235 979 + 10.73 235 979 # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 11.46 35 239 + 11.46 35 239 # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.18 840 1456 + 11.18 840 1456 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1248 1746 + 32.00 1248 1746 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.14 135 312 + 13.14 135 312 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.14 309 354 + 13.14 309 354 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.18 761 1097 + 11.18 761 1097 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.09 59 802 + 11.09 59 802 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.13 69 814 + 11.13 69 814 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.09 59 771 + 11.09 59 771 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.04 5 783 + 11.04 5 783 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.08 8 816 + 11.08 8 816 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.04 5 776 + 11.04 5 776 # IntegratedFastPFOR + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.78 265 934 + 10.78 265 934 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 12.98 89 415 + 12.98 89 415 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.20 436 787 + 11.20 436 787 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.02 483 620 + 12.02 483 620 # sparsity 11 # generating random data... # generating random data... ok. # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.79 232 950 + 11.79 232 950 # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 12.68 34 256 + 12.68 34 256 # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.23 842 1450 + 12.23 842 1450 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1292 1826 + 32.00 1292 1826 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 14.00 127 324 + 14.00 127 324 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 14.00 308 369 + 14.00 308 369 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.23 760 1092 + 12.23 760 1092 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.35 56 795 + 12.35 56 795 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.38 65 829 + 12.38 65 829 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.35 57 822 + 12.35 57 822 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.13 5 706 + 12.13 5 706 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.17 7 750 + 12.17 7 750 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.13 5 712 + 12.13 5 712 # IntegratedFastPFOR + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.83 261 919 + 11.83 261 919 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 14.17 85 401 + 14.17 85 401 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.25 436 781 + 12.25 436 781 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.08 489 623 + 13.08 489 623 # sparsity 12 # generating random data... # generating random data... ok. # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.70 226 932 + 12.70 226 932 # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 13.76 34 261 + 13.76 34 261 # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.16 849 1453 + 13.16 849 1453 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1208 1804 + 32.00 1208 1804 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 14.84 117 307 + 14.84 117 307 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 14.84 260 352 + 14.84 260 352 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.16 762 1095 + 13.16 762 1095 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.46 56 899 + 13.46 56 899 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.48 63 915 + 13.48 63 915 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.46 56 897 + 13.46 56 897 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.20 5 681 + 13.20 5 681 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.24 7 735 + 13.24 7 735 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.20 5 699 + 13.20 5 699 # IntegratedFastPFOR + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.75 260 914 + 12.75 260 914 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 15.51 80 359 + 15.51 80 359 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.18 435 781 + 13.18 435 781 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 14.00 489 626 + 14.00 489 626 Results were written into a CSV file: benchmark-20140918T011257.csv diff --git a/benchmarkresults/benchmarkresults_icore7_10may2013.txt b/benchmarkresults/benchmarkresults_icore7_10may2013.txt index 5b776fb..d10579e 100644 --- a/benchmarkresults/benchmarkresults_icore7_10may2013.txt +++ b/benchmarkresults/benchmarkresults_icore7_10may2013.txt @@ -3,610 +3,610 @@ # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 3.34 51 262 + 3.34 51 262 # me.lemire.integercompression.IntegratedBinaryPacking+me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.09 639 1183 + 3.09 639 1183 # me.lemire.integercompression.JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1151 1468 + 32.00 1151 1468 # me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.00 199 299 + 8.00 199 299 # me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.00 148 339 + 8.00 148 339 # me.lemire.integercompression.BinaryPacking+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.09 613 920 + 3.09 613 920 # me.lemire.integercompression.NewPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.02 143 721 + 3.02 143 721 # me.lemire.integercompression.NewPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.05 164 705 + 3.05 164 705 # me.lemire.integercompression.OptPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.01 26 790 + 3.01 26 790 # me.lemire.integercompression.OptPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.03 30 816 + 3.03 30 816 # me.lemire.integercompression.FastPFOR+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.78 226 811 + 2.78 226 811 # me.lemire.integercompression.Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 2.97 236 604 + 2.97 236 604 # sparsity 2 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 4.17 47 266 + 4.17 47 266 # me.lemire.integercompression.IntegratedBinaryPacking+me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.92 672 1261 + 3.92 672 1261 # me.lemire.integercompression.JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1218 1562 + 32.00 1218 1562 # me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.00 204 290 + 8.00 204 290 # me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.00 236 343 + 8.00 236 343 # me.lemire.integercompression.BinaryPacking+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.92 505 917 + 3.92 505 917 # me.lemire.integercompression.NewPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.82 127 698 + 3.82 127 698 # me.lemire.integercompression.NewPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.85 151 726 + 3.85 151 726 # me.lemire.integercompression.OptPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.81 18 752 + 3.81 18 752 # me.lemire.integercompression.OptPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.84 23 779 + 3.84 23 779 # me.lemire.integercompression.FastPFOR+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.56 228 828 + 3.56 228 828 # me.lemire.integercompression.Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 3.82 182 562 + 3.82 182 562 # sparsity 3 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 4.96 43 276 + 4.96 43 276 # me.lemire.integercompression.IntegratedBinaryPacking+me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.72 662 1187 + 4.72 662 1187 # me.lemire.integercompression.JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1215 1566 + 32.00 1215 1566 # me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.02 198 286 + 8.02 198 286 # me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.02 254 340 + 8.02 254 340 # me.lemire.integercompression.BinaryPacking+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.72 576 848 + 4.72 576 848 # me.lemire.integercompression.NewPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.61 111 654 + 4.61 111 654 # me.lemire.integercompression.NewPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.64 129 699 + 4.64 129 699 # me.lemire.integercompression.OptPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.60 14 732 + 4.60 14 732 # me.lemire.integercompression.OptPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.63 18 761 + 4.63 18 761 # me.lemire.integercompression.FastPFOR+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.36 226 813 + 4.36 226 813 # me.lemire.integercompression.Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 4.76 148 511 + 4.76 148 511 # sparsity 4 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 5.97 39 270 + 5.97 39 270 # me.lemire.integercompression.IntegratedBinaryPacking+me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.72 656 1148 + 5.72 656 1148 # me.lemire.integercompression.JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1212 1555 + 32.00 1212 1555 # me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.09 206 287 + 8.09 206 287 # me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.09 213 334 + 8.09 213 334 # me.lemire.integercompression.BinaryPacking+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.72 626 891 + 5.72 626 891 # me.lemire.integercompression.NewPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.60 105 672 + 5.60 105 672 # me.lemire.integercompression.NewPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.63 121 701 + 5.63 121 701 # me.lemire.integercompression.OptPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.58 10 667 + 5.58 10 667 # me.lemire.integercompression.OptPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.62 14 736 + 5.62 14 736 # me.lemire.integercompression.FastPFOR+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.33 226 805 + 5.33 226 805 # me.lemire.integercompression.Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 5.86 123 464 + 5.86 123 464 # sparsity 5 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 6.49 39 262 + 6.49 39 262 # me.lemire.integercompression.IntegratedBinaryPacking+me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.25 659 1121 + 6.25 659 1121 # me.lemire.integercompression.JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1154 1168 + 32.00 1154 1168 # me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.44 192 265 + 8.44 192 265 # me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.44 240 297 + 8.44 240 297 # me.lemire.integercompression.BinaryPacking+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.25 631 907 + 6.25 631 907 # me.lemire.integercompression.NewPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.13 101 685 + 6.13 101 685 # me.lemire.integercompression.NewPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.16 116 714 + 6.16 116 714 # me.lemire.integercompression.OptPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.11 9 708 + 6.11 9 708 # me.lemire.integercompression.OptPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.14 13 741 + 6.14 13 741 # me.lemire.integercompression.FastPFOR+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.86 225 806 + 5.86 225 806 # me.lemire.integercompression.Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 6.44 120 442 + 6.44 120 442 # sparsity 6 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 7.64 35 269 + 7.64 35 269 # me.lemire.integercompression.IntegratedBinaryPacking+me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.39 654 1111 + 7.39 654 1111 # me.lemire.integercompression.JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1207 1553 + 32.00 1207 1553 # me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.06 185 225 + 9.06 185 225 # me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.06 166 248 + 9.06 166 248 # me.lemire.integercompression.BinaryPacking+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.39 620 888 + 7.39 620 888 # me.lemire.integercompression.NewPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.26 91 679 + 7.26 91 679 # me.lemire.integercompression.NewPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.30 104 704 + 7.30 104 704 # me.lemire.integercompression.OptPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.24 7 704 + 7.24 7 704 # me.lemire.integercompression.OptPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.28 10 735 + 7.28 10 735 # me.lemire.integercompression.FastPFOR+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.00 221 792 + 7.00 221 792 # me.lemire.integercompression.Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 7.76 106 393 + 7.76 106 393 # sparsity 7 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 8.66 33 266 + 8.66 33 266 # me.lemire.integercompression.IntegratedBinaryPacking+me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.41 675 1165 + 8.41 675 1165 # me.lemire.integercompression.JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1210 1553 + 32.00 1210 1553 # me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.10 154 194 + 10.10 154 194 # me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.10 176 207 + 10.10 176 207 # me.lemire.integercompression.BinaryPacking+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.41 628 896 + 8.41 628 896 # me.lemire.integercompression.NewPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.27 84 643 + 8.27 84 643 # me.lemire.integercompression.NewPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.31 95 685 + 8.31 95 685 # me.lemire.integercompression.OptPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.25 6 693 + 8.25 6 693 # me.lemire.integercompression.OptPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.29 9 723 + 8.29 9 723 # me.lemire.integercompression.FastPFOR+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.00 215 773 + 8.00 215 773 # me.lemire.integercompression.Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 9.10 94 357 + 9.10 94 357 # sparsity 8 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 9.52 32 241 + 9.52 32 241 # me.lemire.integercompression.IntegratedBinaryPacking+me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.26 692 1194 + 9.26 692 1194 # me.lemire.integercompression.JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1208 1525 + 32.00 1208 1525 # me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.14 138 178 + 11.14 138 178 # me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.14 187 190 + 11.14 187 190 # me.lemire.integercompression.BinaryPacking+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.26 647 893 + 9.26 647 893 # me.lemire.integercompression.NewPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.14 79 655 + 9.14 79 655 # me.lemire.integercompression.NewPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.18 88 684 + 9.18 88 684 # me.lemire.integercompression.OptPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.11 6 680 + 9.11 6 680 # me.lemire.integercompression.OptPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.15 8 712 + 9.15 8 712 # me.lemire.integercompression.FastPFOR+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.86 220 778 + 8.86 220 778 # me.lemire.integercompression.Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 10.39 86 330 + 10.39 86 330 # sparsity 9 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 10.46 31 253 + 10.46 31 253 # me.lemire.integercompression.IntegratedBinaryPacking+me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.19 661 1122 + 10.19 661 1122 # me.lemire.integercompression.JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1210 1546 + 32.00 1210 1546 # me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.27 126 173 + 12.27 126 173 # me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.27 155 181 + 12.27 155 181 # me.lemire.integercompression.BinaryPacking+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.19 617 886 + 10.19 617 886 # me.lemire.integercompression.NewPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.07 73 634 + 10.07 73 634 # me.lemire.integercompression.NewPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.11 82 669 + 10.11 82 669 # me.lemire.integercompression.OptPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.04 5 663 + 10.04 5 663 # me.lemire.integercompression.OptPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.08 7 700 + 10.08 7 700 # me.lemire.integercompression.FastPFOR+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.79 215 757 + 9.79 215 757 # me.lemire.integercompression.Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 11.79 78 325 + 11.79 78 325 # sparsity 10 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 11.13 30 243 + 11.13 30 243 # me.lemire.integercompression.IntegratedBinaryPacking+me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.83 628 1028 + 10.83 628 1028 # me.lemire.integercompression.JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1167 1498 + 32.00 1167 1498 # me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.71 152 179 + 12.71 152 179 # me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.71 151 187 + 12.71 151 187 # me.lemire.integercompression.BinaryPacking+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.83 389 820 + 10.83 389 820 # me.lemire.integercompression.NewPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.76 72 638 + 10.76 72 638 # me.lemire.integercompression.NewPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.79 79 683 + 10.79 79 683 # me.lemire.integercompression.OptPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.69 5 655 + 10.69 5 655 # me.lemire.integercompression.OptPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.73 7 682 + 10.73 7 682 # me.lemire.integercompression.FastPFOR+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.42 219 767 + 10.42 219 767 # me.lemire.integercompression.Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 12.49 77 332 + 12.49 77 332 # sparsity 11 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 12.38 29 254 + 12.38 29 254 # me.lemire.integercompression.IntegratedBinaryPacking+me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.01 660 1112 + 12.01 660 1112 # me.lemire.integercompression.JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1142 1445 + 32.00 1142 1445 # me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.87 143 172 + 13.87 143 172 # me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.87 144 178 + 13.87 144 178 # me.lemire.integercompression.BinaryPacking+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.01 582 830 + 12.01 582 830 # me.lemire.integercompression.NewPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.04 64 647 + 12.04 64 647 # me.lemire.integercompression.NewPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.08 69 649 + 12.08 69 649 # me.lemire.integercompression.OptPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.91 4 637 + 11.91 4 637 # me.lemire.integercompression.OptPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.95 7 660 + 11.95 7 660 # me.lemire.integercompression.FastPFOR+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.61 217 766 + 11.61 217 766 # me.lemire.integercompression.Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 13.96 73 313 + 13.96 73 313 # sparsity 12 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 13.57 29 265 + 13.57 29 265 # me.lemire.integercompression.IntegratedBinaryPacking+me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.01 611 1012 + 13.01 611 1012 # me.lemire.integercompression.JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1215 1565 + 32.00 1215 1565 # me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 14.73 121 160 + 14.73 121 160 # me.lemire.integercompression.IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 14.73 131 166 + 14.73 131 166 # me.lemire.integercompression.BinaryPacking+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.01 603 832 + 13.01 603 832 # me.lemire.integercompression.NewPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.26 68 737 + 13.26 68 737 # me.lemire.integercompression.NewPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.29 72 761 + 13.29 72 761 # me.lemire.integercompression.OptPFD+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.06 4 591 + 13.06 4 591 # me.lemire.integercompression.OptPFDS9+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.10 6 625 + 13.10 6 625 # me.lemire.integercompression.FastPFOR+me.lemire.integercompression.VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.61 213 755 + 12.61 213 755 # me.lemire.integercompression.Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 15.38 69 281 + 15.38 69 281 diff --git a/benchmarkresults/benchmarkresults_icore7_12november2013.txt b/benchmarkresults/benchmarkresults_icore7_12november2013.txt index 07b11b3..795650e 100644 --- a/benchmarkresults/benchmarkresults_icore7_12november2013.txt +++ b/benchmarkresults/benchmarkresults_icore7_12november2013.txt @@ -10,9 +10,9 @@ Its dependencies (if any) will NOT be available to the current build. [INFO] [enforcer:enforce {execution: enforce-maven}] [INFO] [exec:java {execution: default-cli}] # benchmark based on the ClusterData model from: -# Vo Ngoc Anh and Alistair Moffat. -# Index compression using 64-bit words. -# Softw. Pract. Exper.40, 2 (February 2010), 131-147. +# Vo Ngoc Anh and Alistair Moffat. +# Index compression using 64-bit words. +# Softw. Pract. Exper.40, 2 (February 2010), 131-147. # Results will be written into a CSV file: benchmark-20131112T105209.csv @@ -21,852 +21,852 @@ Its dependencies (if any) will NOT be available to the current build. # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 3.28 48 218 + 3.28 48 218 # IntegratedBinaryPacking + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 3.03 623 1205 + 3.03 623 1205 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1154 1331 + 32.00 1154 1331 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.00 508 554 + 8.00 508 554 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.00 592 709 + 8.00 592 709 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.03 596 900 + 3.03 596 900 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.95 115 701 + 2.95 115 701 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.98 135 726 + 2.98 135 726 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.95 116 726 + 2.95 116 726 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.94 19 761 + 2.94 19 761 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.97 22 767 + 2.97 22 767 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.94 19 765 + 2.94 19 765 # IntegratedFastPFOR + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 2.71 219 797 + 2.71 219 797 # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.71 217 813 + 2.71 217 813 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 2.90 254 599 + 2.90 254 599 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.92 375 669 + 2.92 375 669 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.36 394 503 + 3.36 394 503 # sparsity 2 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 4.09 47 254 + 4.09 47 254 # IntegratedBinaryPacking + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 3.84 636 1160 + 3.84 636 1160 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1152 1264 + 32.00 1152 1264 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.01 510 551 + 8.01 510 551 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.01 594 704 + 8.01 594 704 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.84 602 875 + 3.84 602 875 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.74 101 673 + 3.74 101 673 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.77 117 695 + 3.77 117 695 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.74 101 694 + 3.74 101 694 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.73 15 725 + 3.73 15 725 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.76 18 741 + 3.76 18 741 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.73 15 731 + 3.73 15 731 # IntegratedFastPFOR + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 3.48 215 782 + 3.48 215 782 # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.48 212 789 + 3.48 212 789 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 3.72 190 530 + 3.72 190 530 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.76 375 657 + 3.76 375 657 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.32 392 499 + 4.32 392 499 # sparsity 3 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 5.03 42 250 + 5.03 42 250 # IntegratedBinaryPacking + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 4.77 643 1141 + 4.77 643 1141 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1149 1337 + 32.00 1149 1337 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.02 506 547 + 8.02 506 547 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.02 590 698 + 8.02 590 698 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.77 619 904 + 4.77 619 904 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.66 89 640 + 4.66 89 640 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.69 103 672 + 4.69 103 672 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.66 89 668 + 4.66 89 668 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.64 12 700 + 4.64 12 700 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.68 14 712 + 4.68 14 712 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.64 12 704 + 4.64 12 704 # IntegratedFastPFOR + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 4.39 212 762 + 4.39 212 762 # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.39 209 763 + 4.39 209 763 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 4.81 146 480 + 4.81 146 480 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.75 373 646 + 4.75 373 646 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.40 386 496 + 5.40 386 496 # sparsity 4 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 5.77 39 245 + 5.77 39 245 # IntegratedBinaryPacking + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 5.53 629 1095 + 5.53 629 1095 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1148 1332 + 32.00 1148 1332 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.11 482 522 + 8.11 482 522 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.11 557 655 + 8.11 557 655 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.53 617 889 + 5.53 617 889 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.42 82 659 + 5.42 82 659 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.45 94 684 + 5.45 94 684 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.42 82 686 + 5.42 82 686 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.40 10 695 + 5.40 10 695 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.43 12 715 + 5.43 12 715 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.40 10 705 + 5.40 10 705 # IntegratedFastPFOR + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 5.16 214 776 + 5.16 214 776 # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.16 211 780 + 5.16 211 780 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 5.66 128 457 + 5.66 128 457 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.53 370 645 + 5.53 370 645 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.23 389 493 + 6.23 389 493 # sparsity 5 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 6.57 37 248 + 6.57 37 248 # IntegratedBinaryPacking + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 6.32 640 1113 + 6.32 640 1113 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1150 1349 + 32.00 1150 1349 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.41 416 456 + 8.41 416 456 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.41 473 548 + 8.41 473 548 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.32 622 898 + 6.32 622 898 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.20 75 643 + 6.20 75 643 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.23 86 668 + 6.23 86 668 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.20 75 666 + 6.20 75 666 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.18 8 690 + 6.18 8 690 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.21 11 705 + 6.21 11 705 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.18 8 697 + 6.18 8 697 # IntegratedFastPFOR + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 5.93 211 741 + 5.93 211 741 # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.93 208 772 + 5.93 208 772 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 6.51 118 426 + 6.51 118 426 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.32 374 639 + 6.32 374 639 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.05 390 492 + 7.05 390 492 # sparsity 6 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 7.73 35 242 + 7.73 35 242 # IntegratedBinaryPacking + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 7.48 630 1071 + 7.48 630 1071 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1143 1350 + 32.00 1143 1350 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.04 328 365 + 9.04 328 365 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.04 365 415 + 9.04 365 415 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.48 620 882 + 7.48 620 882 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.36 67 641 + 7.36 67 641 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.39 76 668 + 7.39 76 668 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.36 67 667 + 7.36 67 667 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.33 7 679 + 7.33 7 679 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.37 9 695 + 7.37 9 695 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.33 7 686 + 7.33 7 686 # IntegratedFastPFOR + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 7.09 211 749 + 7.09 211 749 # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.09 208 764 + 7.09 208 764 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 7.88 101 383 + 7.88 101 383 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.49 372 630 + 7.49 372 630 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.27 389 489 + 8.27 389 489 # sparsity 7 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 8.46 33 244 + 8.46 33 244 # IntegratedBinaryPacking + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 8.21 628 1052 + 8.21 628 1052 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1148 1334 + 32.00 1148 1334 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.01 257 290 + 10.01 257 290 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.01 284 315 + 10.01 284 315 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.21 612 859 + 8.21 612 859 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.08 63 626 + 8.08 63 626 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.11 71 665 + 8.11 71 665 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.08 63 663 + 8.08 63 663 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.06 6 675 + 8.06 6 675 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.09 8 687 + 8.09 8 687 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.06 6 682 + 8.06 6 682 # IntegratedFastPFOR + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 7.81 210 756 + 7.81 210 756 # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 7.81 207 759 + 7.81 207 759 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 8.85 92 353 + 8.85 92 353 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.22 369 622 + 8.22 369 622 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.00 389 486 + 9.00 389 486 # sparsity 8 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 9.41 32 234 + 9.41 32 234 # IntegratedBinaryPacking + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 9.16 636 1062 + 9.16 636 1062 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1151 1326 + 32.00 1151 1326 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.08 231 269 + 11.08 231 269 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.08 259 288 + 11.08 259 288 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.16 616 873 + 9.16 616 873 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.04 59 638 + 9.04 59 638 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.07 65 664 + 9.07 65 664 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.04 59 664 + 9.04 59 664 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.01 5 665 + 9.01 5 665 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.05 7 680 + 9.05 7 680 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.01 5 671 + 9.01 5 671 # IntegratedFastPFOR + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 8.77 209 746 + 8.77 209 746 # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.77 207 738 + 8.77 207 738 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 10.25 81 324 + 10.25 81 324 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.18 372 625 + 9.18 372 625 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.98 387 479 + 9.98 387 479 # sparsity 9 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 10.41 31 238 + 10.41 31 238 # IntegratedBinaryPacking + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 10.15 637 1070 + 10.15 637 1070 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1145 1413 + 32.00 1145 1413 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.15 221 267 + 12.15 221 267 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.15 252 284 + 12.15 252 284 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.15 609 849 + 10.15 609 849 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.03 54 624 + 10.03 54 624 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.06 60 650 + 10.06 60 650 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.03 54 649 + 10.03 54 649 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.00 5 653 + 10.00 5 653 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.03 7 666 + 10.03 7 666 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.00 5 659 + 10.00 5 659 # IntegratedFastPFOR + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 9.75 207 739 + 9.75 207 739 # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.75 206 743 + 9.75 206 743 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 11.72 73 313 + 11.72 73 313 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.17 369 611 + 10.17 369 611 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.98 381 474 + 10.98 381 474 # sparsity 10 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 11.57 29 236 + 11.57 29 236 # IntegratedBinaryPacking + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 11.28 626 1033 + 11.28 626 1033 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1141 1328 + 32.00 1141 1328 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.18 219 276 + 13.18 219 276 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.18 254 294 + 13.18 254 294 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.28 610 848 + 11.28 610 848 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.19 50 617 + 11.19 50 617 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.23 56 638 + 11.23 56 638 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.19 50 640 + 11.19 50 640 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.13 4 640 + 11.13 4 640 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.17 6 655 + 11.17 6 655 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.13 4 647 + 11.13 4 647 # IntegratedFastPFOR + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 10.87 207 736 + 10.87 207 736 # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.87 204 734 + 10.87 204 734 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 13.19 68 311 + 13.19 68 311 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.30 369 612 + 11.30 369 612 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.13 386 477 + 12.13 386 477 # sparsity 11 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 12.41 29 229 + 12.41 29 229 # IntegratedBinaryPacking + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 12.01 634 1046 + 12.01 634 1046 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1148 1365 + 32.00 1148 1365 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.84 208 261 + 13.84 208 261 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.84 241 277 + 13.84 241 277 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.01 605 832 + 12.01 605 832 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.07 49 650 + 12.07 49 650 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.10 54 674 + 12.10 54 674 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.07 49 675 + 12.07 49 675 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.93 4 604 + 11.93 4 604 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.97 6 618 + 11.97 6 618 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.93 4 611 + 11.93 4 611 # IntegratedFastPFOR + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 11.60 206 724 + 11.60 206 724 # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.60 203 724 + 11.60 203 724 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 13.98 66 291 + 13.98 66 291 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.04 367 603 + 12.04 367 603 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.86 385 478 + 12.86 385 478 # sparsity 12 # generating random data... # generating random data... ok. # kamikaze PForDelta # bits per int, compress speed (mis), decompression speed (mis) - 13.48 28 236 + 13.48 28 236 # IntegratedBinaryPacking + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 12.96 634 1051 + 12.96 634 1051 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1150 1307 + 32.00 1150 1307 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 14.69 202 258 + 14.69 202 258 # IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 14.69 235 272 + 14.69 235 272 # BinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.96 610 849 + 12.96 610 849 # NewPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.17 48 698 + 13.17 48 698 # NewPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.20 52 714 + 13.20 52 714 # NewPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.17 48 720 + 13.17 48 720 # OptPFD + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.96 4 588 + 12.96 4 588 # OptPFDS9 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.00 6 602 + 13.00 6 602 # OptPFDS16 + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.96 4 597 + 12.96 4 597 # IntegratedFastPFOR + IntegratedVariableByte (Integrated) # bits per int, compress speed (mis), decompression speed (mis) - 12.55 206 726 + 12.55 206 726 # FastPFOR + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.55 203 725 + 12.55 203 725 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 15.40 63 269 + 15.40 63 269 # XorBinaryPacking + VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.99 368 613 + 12.99 368 613 # DeltaZigzagBinaryPacking + DeltaZigzagVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.81 384 476 + 13.81 384 476 Results were written into a CSV file: benchmark-20131112T105209.csv diff --git a/benchmarkresults/benchmarkresults_skippable_haswell_18sept2014.txt b/benchmarkresults/benchmarkresults_skippable_haswell_18sept2014.txt index 4159637..7e35696 100644 --- a/benchmarkresults/benchmarkresults_skippable_haswell_18sept2014.txt +++ b/benchmarkresults/benchmarkresults_skippable_haswell_18sept2014.txt @@ -1,7 +1,7 @@ # benchmark based on the ClusterData model from: -# Vo Ngoc Anh and Alistair Moffat. -# Index compression using 64-bit words. -# Softw. Pract. Exper.40, 2 (February 2010), 131-147. +# Vo Ngoc Anh and Alistair Moffat. +# Index compression using 64-bit words. +# Softw. Pract. Exper.40, 2 (February 2010), 131-147. # Results will be written into a CSV file: benchmark-20140918T011322.csv @@ -10,504 +10,504 @@ # generating random data... ok. # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.04 840 1619 + 3.04 840 1619 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1169 1698 + 32.00 1169 1698 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.00 195 369 + 8.00 195 369 # BinaryPacking+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.04 651 1148 + 3.04 651 1148 # NewPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.96 129 865 + 2.96 129 865 # NewPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.98 158 877 + 2.98 158 877 # NewPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.96 130 879 + 2.96 130 879 # OptPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.95 25 930 + 2.95 25 930 # OptPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.97 27 951 + 2.97 27 951 # OptPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.95 25 935 + 2.95 25 935 # FastPFOR+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 2.82 235 928 + 2.82 235 928 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 2.93 255 740 + 2.93 255 740 # Simple16 # bits per int, compress speed (mis), decompression speed (mis) - 2.77 147 395 + 2.77 147 395 # sparsity 2 # generating random data... # generating random data... ok. # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.82 831 1555 + 3.82 831 1555 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1183 1800 + 32.00 1183 1800 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.00 220 372 + 8.00 220 372 # BinaryPacking+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.82 659 1139 + 3.82 659 1139 # NewPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.72 116 855 + 3.72 116 855 # NewPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.75 136 851 + 3.75 136 851 # NewPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.72 115 853 + 3.72 115 853 # OptPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.71 19 895 + 3.71 19 895 # OptPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.74 22 917 + 3.74 22 917 # OptPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.71 19 900 + 3.71 19 900 # FastPFOR+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 3.59 230 908 + 3.59 230 908 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 3.74 195 654 + 3.74 195 654 # Simple16 # bits per int, compress speed (mis), decompression speed (mis) - 3.49 111 366 + 3.49 111 366 # sparsity 3 # generating random data... # generating random data... ok. # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.78 817 1519 + 4.78 817 1519 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1019 1759 + 32.00 1019 1759 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.02 238 370 + 8.02 238 370 # BinaryPacking+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.78 680 1121 + 4.78 680 1121 # NewPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.67 98 825 + 4.67 98 825 # NewPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.70 123 840 + 4.70 123 840 # NewPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.67 102 834 + 4.67 102 834 # OptPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.66 15 861 + 4.66 15 861 # OptPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.69 18 895 + 4.69 18 895 # OptPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.66 14 871 + 4.66 14 871 # FastPFOR+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 4.54 231 904 + 4.54 231 904 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 4.84 153 589 + 4.84 153 589 # Simple16 # bits per int, compress speed (mis), decompression speed (mis) - 4.40 83 339 + 4.40 83 339 # sparsity 4 # generating random data... # generating random data... ok. # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.65 788 1505 + 5.65 788 1505 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1142 1757 + 32.00 1142 1757 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.09 242 363 + 8.09 242 363 # BinaryPacking+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.65 636 1113 + 5.65 636 1113 # NewPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.52 92 828 + 5.52 92 828 # NewPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.56 112 826 + 5.56 112 826 # NewPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.52 94 826 + 5.52 94 826 # OptPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.51 12 854 + 5.51 12 854 # OptPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.54 15 883 + 5.54 15 883 # OptPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.51 12 858 + 5.51 12 858 # FastPFOR+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 5.39 218 886 + 5.39 218 886 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 5.80 136 566 + 5.80 136 566 # Simple16 # bits per int, compress speed (mis), decompression speed (mis) - 5.32 68 319 + 5.32 68 319 # sparsity 5 # generating random data... # generating random data... ok. # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.31 804 1490 + 6.31 804 1490 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1105 1860 + 32.00 1105 1860 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.40 245 330 + 8.40 245 330 # BinaryPacking+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.31 673 1121 + 6.31 673 1121 # NewPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.19 87 832 + 6.19 87 832 # NewPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.22 107 844 + 6.22 107 844 # NewPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.19 88 830 + 6.19 88 830 # OptPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.17 10 851 + 6.17 10 851 # OptPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.20 14 883 + 6.20 14 883 # OptPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.17 10 852 + 6.17 10 852 # FastPFOR+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.07 217 875 + 6.07 217 875 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 6.51 130 513 + 6.51 130 513 # Simple16 # bits per int, compress speed (mis), decompression speed (mis) - 6.13 60 307 + 6.13 60 307 # sparsity 6 # generating random data... # generating random data... ok. # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.99 742 1431 + 6.99 742 1431 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1163 1660 + 32.00 1163 1660 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.99 205 290 + 8.99 205 290 # BinaryPacking+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.99 637 1107 + 6.99 637 1107 # NewPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.87 82 821 + 6.87 82 821 # NewPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.90 100 830 + 6.90 100 830 # NewPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.87 82 818 + 6.87 82 818 # OptPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.85 9 834 + 6.85 9 834 # OptPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.88 12 865 + 6.88 12 865 # OptPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.85 9 836 + 6.85 9 836 # FastPFOR+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 6.75 224 877 + 6.75 224 877 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 7.33 118 485 + 7.33 118 485 # Simple16 # bits per int, compress speed (mis), decompression speed (mis) - 6.98 54 296 + 6.98 54 296 # sparsity 7 # generating random data... # generating random data... ok. # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.31 770 1463 + 8.31 770 1463 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1074 1832 + 32.00 1074 1832 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.01 203 240 + 10.01 203 240 # BinaryPacking+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.31 664 1105 + 8.31 664 1105 # NewPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.18 73 796 + 8.18 73 796 # NewPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.22 88 808 + 8.22 88 808 # NewPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.18 73 792 + 8.18 73 792 # OptPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.16 7 819 + 8.16 7 819 # OptPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.20 10 849 + 8.20 10 849 # OptPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.16 7 810 + 8.16 7 810 # FastPFOR+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.05 217 851 + 8.05 217 851 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 9.01 103 430 + 9.01 103 430 # Simple16 # bits per int, compress speed (mis), decompression speed (mis) - 8.61 47 277 + 8.61 47 277 # sparsity 8 # generating random data... # generating random data... ok. # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.88 800 1414 + 8.88 800 1414 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1078 1718 + 32.00 1078 1718 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.91 211 227 + 10.91 211 227 # BinaryPacking+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.88 671 1083 + 8.88 671 1083 # NewPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.76 70 804 + 8.76 70 804 # NewPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.80 84 814 + 8.80 84 814 # NewPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.76 70 800 + 8.76 70 800 # OptPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.73 7 807 + 8.73 7 807 # OptPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.77 9 792 + 8.77 9 792 # OptPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.73 7 801 + 8.73 7 801 # FastPFOR+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 8.64 211 837 + 8.64 211 837 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 9.94 96 417 + 9.94 96 417 # Simple16 # bits per int, compress speed (mis), decompression speed (mis) - 9.51 44 268 + 9.51 44 268 # sparsity 9 # generating random data... # generating random data... ok. # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.19 834 1442 + 10.19 834 1442 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1200 1632 + 32.00 1200 1632 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.16 206 212 + 12.16 206 212 # BinaryPacking+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.19 675 1092 + 10.19 675 1092 # NewPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.07 64 804 + 10.07 64 804 # NewPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.10 76 814 + 10.10 76 814 # NewPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.07 63 802 + 10.07 63 802 # OptPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.04 6 810 + 10.04 6 810 # OptPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.08 9 841 + 10.08 9 841 # OptPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.04 6 808 + 10.04 6 808 # FastPFOR+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 9.94 222 858 + 9.94 222 858 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 11.79 88 397 + 11.79 88 397 # Simple16 # bits per int, compress speed (mis), decompression speed (mis) - 11.40 38 253 + 11.40 38 253 # sparsity 10 # generating random data... # generating random data... ok. # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.10 814 1406 + 11.10 814 1406 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1215 1820 + 32.00 1215 1820 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.07 207 208 + 13.07 207 208 # BinaryPacking+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.10 681 1073 + 11.10 681 1073 # NewPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.00 60 800 + 11.00 60 800 # NewPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.04 72 809 + 11.04 72 809 # NewPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 11.00 60 796 + 11.00 60 796 # OptPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.95 6 785 + 10.95 6 785 # OptPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.99 8 815 + 10.99 8 815 # OptPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.95 6 782 + 10.95 6 782 # FastPFOR+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 10.84 226 860 + 10.84 226 860 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 12.93 84 389 + 12.93 84 389 # Simple16 # bits per int, compress speed (mis), decompression speed (mis) - 12.58 36 245 + 12.58 36 245 # sparsity 11 # generating random data... # generating random data... ok. # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.31 814 1392 + 12.31 814 1392 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1171 1846 + 32.00 1171 1846 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 14.14 172 201 + 14.14 172 201 # BinaryPacking+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.31 668 1071 + 12.31 668 1071 # NewPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.42 56 825 + 12.42 56 825 # NewPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.45 67 832 + 12.45 67 832 # NewPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.42 56 821 + 12.42 56 821 # OptPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.22 5 729 + 12.22 5 729 # OptPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.27 8 758 + 12.27 8 758 # OptPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.22 5 731 + 12.22 5 731 # FastPFOR+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.07 222 836 + 12.07 222 836 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 14.31 81 377 + 14.31 81 377 # Simple16 # bits per int, compress speed (mis), decompression speed (mis) - 14.05 33 238 + 14.05 33 238 # sparsity 12 # generating random data... # generating random data... ok. # IntegratedBinaryPacking + IntegratedVariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.97 805 1375 + 12.97 805 1375 # JustCopy # bits per int, compress speed (mis), decompression speed (mis) - 32.00 1160 1737 + 32.00 1160 1737 # VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 14.72 186 193 + 14.72 186 193 # BinaryPacking+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.97 656 1037 + 12.97 656 1037 # NewPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.22 56 886 + 13.22 56 886 # NewPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.24 67 891 + 13.24 67 891 # NewPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.22 56 883 + 13.22 56 883 # OptPFD+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.98 5 704 + 12.98 5 704 # OptPFDS9+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 13.02 8 740 + 13.02 8 740 # OptPFDS16+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.98 5 704 + 12.98 5 704 # FastPFOR+VariableByte # bits per int, compress speed (mis), decompression speed (mis) - 12.73 223 845 + 12.73 223 845 # Simple9 # bits per int, compress speed (mis), decompression speed (mis) - 15.35 78 347 + 15.35 78 347 # Simple16 # bits per int, compress speed (mis), decompression speed (mis) - 15.15 31 225 + 15.15 31 225 Results were written into a CSV file: benchmark-20140918T011322.csv diff --git a/build.xml b/build.xml index 974a14c..d02cddd 100644 --- a/build.xml +++ b/build.xml @@ -8,6 +8,16 @@ + + + + + + + + + + diff --git a/example.java b/example.java index 6569ebd..75dfb05 100644 --- a/example.java +++ b/example.java @@ -88,8 +88,7 @@ public static void basicExample() { /** * Like the basicExample, but we store the input array size manually. */ - @Test - public void basicExampleHeadless() { + public static void basicExampleHeadless() { int[] data = new int[2342351]; System.out.println("Compressing " + data.length + " integers in one go using the headless approach"); // data should be sorted for best @@ -105,11 +104,7 @@ public void basicExampleHeadless() { // be processed using variable byte SkippableIntegratedComposition codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(), new IntegratedVariableByte()); - // output vector should be large enough... - int[] compressed = new int[data.length + 1024]; - // compressed might not be large enough in some cases - // if you get java.lang.ArrayIndexOutOfBoundsException, try - // allocating more memory + int[] compressed = new int[codec.maxHeadlessCompressedLength(new IntWrapper(0), data.length)]; /** * @@ -118,7 +113,7 @@ public void basicExampleHeadless() { */ IntWrapper inputoffset = new IntWrapper(0); IntWrapper outputoffset = new IntWrapper(1); - compressed[0] = data.length; // we manually store how many integers we + compressed[0] = data.length; // we manually store how many integers codec.headlessCompress(data, inputoffset, data.length, compressed, outputoffset, new IntWrapper(0)); // got it! // inputoffset should be at data.length but outputoffset tells @@ -268,10 +263,12 @@ public static void headlessDemo() { int[] uncompressed1 = {1,2,1,3,1}; int[] uncompressed2 = {3,2,4,6,1}; - int[] compressed = new int[uncompressed1.length+uncompressed2.length+1024]; - SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte()); + int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed1.length) + + codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed2.length); + int[] compressed = new int[maxCompressedLength]; + // compressing IntWrapper outPos = new IntWrapper(); diff --git a/examples/vector/Example.java b/examples/vector/Example.java new file mode 100644 index 0000000..e8d2455 --- /dev/null +++ b/examples/vector/Example.java @@ -0,0 +1,67 @@ +// Copyright (C) 2022 Intel Corporation + +// SPDX-License-Identifier: Apache-2.0 + +import java.util.Arrays; +import me.lemire.integercompression.FastPFOR; +import me.lemire.integercompression.IntWrapper; +import me.lemire.integercompression.Composition; +import me.lemire.integercompression.IntegerCODEC; +import me.lemire.integercompression.VariableByte; +import me.lemire.integercompression.vector.VectorFastPFOR; + +public class Example { + public static void main(String[] args) { + if (args.length == 0) + throw new IllegalArgumentException(); + + // pass 0 for Vector compressor , non-zero for default compressor + int compressorToUse = Integer.parseInt(args[0]); + + final int N = 1310720; + int[] data = new int[N]; + + // 2-bit data + for (int k = 0; k < N; k += 1) + data[k] = 3; + + // a few large values + for (int k = 0; k < N; k += 5) + data[k] = 100; + for (int k = 0; k < N; k += 533) + data[k] = 10000; + + int[] compressed = new int[N + 1024]; + + IntegerCODEC codec = new Composition( + compressorToUse == 0 ? new VectorFastPFOR() : new FastPFOR(), + new VariableByte()); + + IntWrapper inputoffset = new IntWrapper(0); + IntWrapper outputoffset = new IntWrapper(0); + + codec.compress(data, inputoffset, data.length, compressed, outputoffset); + + System.out.println("compressed unsorted integers from " + + data.length * 4 / 1024 + "KB to " + + outputoffset.intValue() * 4 / 1024 + "KB"); + + compressed = Arrays.copyOf(compressed, outputoffset.intValue()); + + int[] recovered = new int[N]; + IntWrapper recoffset = new IntWrapper(0); + + codec.uncompress(compressed, new IntWrapper(0), compressed.length, + recovered, recoffset); + + System.out.println("compressed length = " + compressed.length + + ", uncompressed length = " + recoffset.intValue()); + + if (Arrays.equals(data, recovered)) + System.out.println("data is recovered without loss"); + else + throw new RuntimeException("bug"); // could use assert + + System.out.println(); + } +} diff --git a/examples/vector/README.md b/examples/vector/README.md new file mode 100644 index 0000000..cbcbfeb --- /dev/null +++ b/examples/vector/README.md @@ -0,0 +1,12 @@ +Compile +------- +``` +javac -cp Example.java +``` + +Run +--- +``` +java --add-modules jdk.incubator.vector -cp Example 0 +``` + diff --git a/jitpack.yml b/jitpack.yml new file mode 100644 index 0000000..255e0f4 --- /dev/null +++ b/jitpack.yml @@ -0,0 +1,5 @@ +jdk: + - openjdk21 +before_install: + - sdk install java 21-open + - sdk use java 21-open diff --git a/pom.xml b/pom.xml index 7a36b12..33db8e6 100644 --- a/pom.xml +++ b/pom.xml @@ -2,12 +2,14 @@ 4.0.0 me.lemire.integercompression JavaFastPFOR - 0.1.13-SNAPSHOT + 0.3.11-SNAPSHOT jar - 1.6 - 1.6 + 21 + 21 + 21 UTF-8 + 0.8.5 @@ -18,18 +20,31 @@ - scm:git:git@github.com:lemire/JavaFastPFOR.git - scm:git:git@github.com:lemire/JavaFastPFOR.git - scm:git:git@github.com:lemire/JavaFastPFOR.git + scm:git:https://github.com/fast-pack/JavaFastPFOR.git + scm:git:https://github.com/fast-pack/JavaFastPFOR.git + scm:git:https://github.com/fast-pack/JavaFastPFOR.git + HEAD + + + + sonatype-central-portal + Sonatype Central Portal + https://central.sonatype.com/repository/maven-snapshots/ + + + sonatype-central-portal + Sonatype Central Portal + https://repo.maven.apache.org/maven2/ + + + lemire Daniel Lemire - lemire@gmail.com + daniel@lemire.me http://lemire.me/en/ - LICEF Research Center - http://licef.ca architect developer @@ -45,21 +60,64 @@ junit junit - 4.10 + 4.13.1 + test + + + org.roaringbitmap + RoaringBitmap + 0.9.35 test GitHub Issue Tracking - https://github.com/lemire/JavaFastPFOR/issues + https://github.com/fast-pack/JavaFastPFOR/issues - - org.sonatype.oss - oss-parent - 9 - + + + eu.maveniverse.maven.njord + extension3 + ${njord.version} + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.12.1 + + 21 + 21 + + + + default-compile + compile + + compile + + + + me/lemire/integercompression/vector/* + module-info.java + + + + + + + org.apache.felix maven-bundle-plugin @@ -78,31 +136,14 @@ 1.1 me.lemire.integercompression.benchmarktools.Benchmark - - - org.apache.maven.plugins - maven-gpg-plugin - 1.4 - - - sign-artifacts - verify - - sign - - - - org.apache.maven.plugins maven-javadoc-plugin - 2.8 + 3.6.3 - com.kamikaze.pfordelta:me.lemire.integercompression.benchmarktools + me.lemire.integercompression.vector;com.kamikaze.pfordelta:me.lemire.integercompression.benchmarktools @@ -129,7 +170,7 @@ org.jacoco jacoco-maven-plugin - 0.7.8 + 0.8.13 me/lemire/integercompression/Kamikaze @@ -147,16 +188,79 @@ - org.eluder.coveralls - coveralls-maven-plugin - 3.2.1 + org.apache.maven.plugins + maven-release-plugin + 3.0.1 + + deploy + true + + + + org.apache.maven.plugins + maven-gpg-plugin + 3.2.8 + + + sign-artifacts + verify + + sign + + + + + + + + eu.maveniverse.maven.plugins + njord + ${njord.version} + + + maven-clean-plugin + 2.5 + + + maven-deploy-plugin + 2.8.1 + + + maven-install-plugin + 2.5.1 + + + maven-jar-plugin + 2.4 + + + maven-javadoc-plugin + 2.9.1 + + + maven-resources-plugin + 2.6 + + + maven-site-plugin + 3.3 + + + maven-source-plugin + 2.2.1 + + + maven-surefire-plugin + 2.17 + + + JavaFastPFOR - https://github.com/lemire/JavaFastPFOR/ + https://github.com/fast-pack/JavaFastPFOR/ -It is a library to compress and uncompress arrays of integers -very fast. The assumption is that most (but not all) values in -your array use less than 32 bits. +A library to compress and uncompress arrays of integers +very quickly. diff --git a/src/main/java/me/lemire/integercompression/BinaryPacking.java b/src/main/java/me/lemire/integercompression/BinaryPacking.java index 8d5ff90..ce37ff0 100644 --- a/src/main/java/me/lemire/integercompression/BinaryPacking.java +++ b/src/main/java/me/lemire/integercompression/BinaryPacking.java @@ -37,8 +37,9 @@ * @author Daniel Lemire */ public final class BinaryPacking implements IntegerCODEC, SkippableIntegerCODEC { - final static int BLOCK_SIZE = 32; - + public final static int BLOCK_SIZE = 32; + private static final int MAX_BIT_WIDTH = Integer.SIZE; + @Override public void compress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { @@ -131,7 +132,16 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, outpos.add(outlength); inpos.set(tmpinpos); } - + + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int blockCount = inlength / BLOCK_SIZE; + int headersSizeInInts = blockCount / Integer.BYTES + (blockCount % Integer.BYTES); + int blocksSizeInInts = blockCount * MAX_BIT_WIDTH; + compressedPositions.add(blockCount * BLOCK_SIZE); + return headersSizeInInts + blocksSizeInInts; + } + @Override public String toString() { return this.getClass().getSimpleName(); diff --git a/src/main/java/me/lemire/integercompression/BitPacking.java b/src/main/java/me/lemire/integercompression/BitPacking.java index e83c9e0..8652be4 100644 --- a/src/main/java/me/lemire/integercompression/BitPacking.java +++ b/src/main/java/me/lemire/integercompression/BitPacking.java @@ -1690,7 +1690,7 @@ protected static void fastpack9(final int[] in, int inpos, } /** - * Unpack 32 integers + * Pack without mask 32 integers * * @param in * source array @@ -3005,7 +3005,7 @@ protected static void fastpackwithoutmask9(final int[] in, int inpos, } /** - * Pack the 32 integers + * Unpack the 32 integers * * @param in * source array diff --git a/src/main/java/me/lemire/integercompression/ByteIntegerCODEC.java b/src/main/java/me/lemire/integercompression/ByteIntegerCODEC.java index 47d4f57..6e8f903 100644 --- a/src/main/java/me/lemire/integercompression/ByteIntegerCODEC.java +++ b/src/main/java/me/lemire/integercompression/ByteIntegerCODEC.java @@ -18,9 +18,9 @@ public interface ByteIntegerCODEC { * Compress data from an array to another array. * * Both inpos and outpos are modified to represent how much data was - * read and written to if 12 ints (inlength = 12) are compressed to 3 + * read and written to. If 12 ints (inlength = 12) are compressed to 3 * bytes, then inpos will be incremented by 12 while outpos will be - * incremented by 3 we use IntWrapper to pass the values by reference. + * incremented by 3. We use IntWrapper to pass the values by reference. * * @param in * input array diff --git a/src/main/java/me/lemire/integercompression/DeltaZigzagVariableByte.java b/src/main/java/me/lemire/integercompression/DeltaZigzagVariableByte.java index 4b2f896..2f8c709 100644 --- a/src/main/java/me/lemire/integercompression/DeltaZigzagVariableByte.java +++ b/src/main/java/me/lemire/integercompression/DeltaZigzagVariableByte.java @@ -105,7 +105,7 @@ public void uncompress(int[] inBuf, IntWrapper inPos, int inLen, int ip = inPos.get(); int op = outPos.get(); - int vbcNum = 0, vbcShift = 24; // Varialbe Byte Context. + int vbcNum = 0, vbcShift = 24; // Variable Byte Context. final int inPosLast = ip + inLen; while (ip < inPosLast) { // Fetch a byte value. @@ -134,6 +134,9 @@ public void uncompress(int[] inBuf, IntWrapper inPos, int inLen, * In case you need a different way to allocate buffers, you can override this method * with a custom behavior. The default implementation allocates a new Java direct * {@link ByteBuffer} on each invocation. + * + * @param sizeInBytes + * @return */ protected ByteBuffer makeBuffer(int sizeInBytes) { return ByteBuffer.allocateDirect(sizeInBytes); diff --git a/src/main/java/me/lemire/integercompression/FastPFOR.java b/src/main/java/me/lemire/integercompression/FastPFOR.java index 36226c0..5475496 100644 --- a/src/main/java/me/lemire/integercompression/FastPFOR.java +++ b/src/main/java/me/lemire/integercompression/FastPFOR.java @@ -40,6 +40,13 @@ */ public class FastPFOR implements IntegerCODEC,SkippableIntegerCODEC { final static int OVERHEAD_OF_EACH_EXCEPT = 8; + private static final int OVERHEAD_OF_EACH_PAGE_IN_INTS = 36; // 1 int for the header + // 1 int for the byte array size + // 1 int for the bitmap + // 1 int for byte array padding (to align to 4 bytes) + // 32 to have enough space to bit-pack the exceptions + private static final int OVERHEAD_OF_EACH_BLOCK_IN_INTS = 1; // 1 byte for the number of bits allocated per truncated integer + // 1 byte for the number of exceptions /** * */ @@ -65,7 +72,7 @@ public class FastPFOR implements IntegerCODEC,SkippableIntegerCODEC { * @param pagesize * the desired page size (recommended value is FastPFOR.DEFAULT_PAGE_SIZE) */ - private FastPFOR(int pagesize) { + FastPFOR(int pagesize) { pageSize = pagesize; // Initiate arrrays. byteContainer = makeBuffer(3 * pageSize @@ -230,6 +237,18 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, } } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + + int pageCount = (inlength + pageSize - 1) / pageSize; + int blockCount = inlength / BLOCK_SIZE; + + // getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers. + int blockSizeInInts = OVERHEAD_OF_EACH_BLOCK_IN_INTS + BLOCK_SIZE; + return OVERHEAD_OF_EACH_PAGE_IN_INTS * pageCount + blockSizeInInts * blockCount + 24; + } + private void decodePage(int[] in, IntWrapper inpos, int[] out, IntWrapper outpos, int thissize) { final int initpos = inpos.get(); @@ -336,6 +355,9 @@ public String toString() { * In case you need a different way to allocate buffers, you can override this method * with a custom behavior. The default implementation allocates a new Java direct * {@link ByteBuffer} on each invocation. + * + * @param sizeInBytes + * @return */ protected ByteBuffer makeBuffer(int sizeInBytes) { return ByteBuffer.allocateDirect(sizeInBytes); diff --git a/src/main/java/me/lemire/integercompression/FastPFOR128.java b/src/main/java/me/lemire/integercompression/FastPFOR128.java index b124072..0557c62 100644 --- a/src/main/java/me/lemire/integercompression/FastPFOR128.java +++ b/src/main/java/me/lemire/integercompression/FastPFOR128.java @@ -23,6 +23,13 @@ */ public class FastPFOR128 implements IntegerCODEC,SkippableIntegerCODEC { final static int OVERHEAD_OF_EACH_EXCEPT = 8; + private static final int OVERHEAD_OF_EACH_PAGE_IN_INTS = 36; // 1 int for the header + // 1 int for the byte array size + // 1 int for the bitmap + // 1 int for byte array padding (to align to 4 bytes) + // 32 to have enough space to bit-pack the exceptions + private static final int OVERHEAD_OF_EACH_BLOCK_IN_INTS = 1; // 1 byte for the number of bits allocated per truncated integer + // 1 byte for the number of exceptions /** * */ @@ -209,6 +216,18 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, } } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + + int pageCount = (inlength + pageSize - 1) / pageSize; + int blockCount = inlength / BLOCK_SIZE; + + // getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers. + int blockSizeInInts = OVERHEAD_OF_EACH_BLOCK_IN_INTS + BLOCK_SIZE; + return OVERHEAD_OF_EACH_PAGE_IN_INTS * pageCount + blockSizeInInts * blockCount + 24; + } + private void decodePage(int[] in, IntWrapper inpos, int[] out, IntWrapper outpos, int thissize) { final int initpos = inpos.get(); @@ -317,6 +336,9 @@ public String toString() { * In case you need a different way to allocate buffers, you can override this method * with a custom behavior. The default implementation allocates a new Java direct * {@link ByteBuffer} on each invocation. + * + * @param sizeInBytes + * @return */ protected ByteBuffer makeBuffer(int sizeInBytes) { return ByteBuffer.allocateDirect(sizeInBytes); diff --git a/src/main/java/me/lemire/integercompression/GroupSimple9.java b/src/main/java/me/lemire/integercompression/GroupSimple9.java index 0ce10ce..bd8acfa 100644 --- a/src/main/java/me/lemire/integercompression/GroupSimple9.java +++ b/src/main/java/me/lemire/integercompression/GroupSimple9.java @@ -13,3540 +13,3546 @@ public final class GroupSimple9 implements IntegerCODEC, SkippableIntegerCODEC { - private static final int[][] M = { { 0, 1, 2, 3, 4, 5, 6, 7, 8 }, { 9, 10, 11, 12, 13, 14, 15, 16, 17 }, - { 18, 19, 20, 21, 22, 23, 24, 25, 26 }, { 27, 28, 29, 30, 31, 32, 33, 34, 35 }, - { 36, 37, 38, 39, 40, 41, 42, 43, 44 }, { 45, 46, 47, 48, 49, 50, 51, 52, 53 }, - { 54, 55, 56, 57, 58, 59, 60, 61, 62 }, { 63, 64, 65, 66, 67, 68, 69, 70, 71 }, - { 72, 73, 74, 75, 76, 77, 78, 79, 80 } }; - - @Override - public void compress(int[] in, IntWrapper inpos, int inlength, int out[], IntWrapper outpos) { - if (inlength == 0) - return; - out[outpos.get()] = inlength; - outpos.increment(); - headlessCompress(in, inpos, inlength, out, outpos); - } - - private void encode0(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 24; i++) - out[outf + 0] = (out[outf + 0] << 1) + (in[inf + i]); - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; - for (int i = 0; i < 28; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 28 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode1(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 24; i++) - out[outf + 0] = (out[outf + 0] << 1) + in[inf + i]; - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; - for (int i = 0; i < 14; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 28 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode2(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 24; i++) - out[outf + 0] = (out[outf + 0] << 1) + in[inf + i]; - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; - for (int i = 0; i < 9; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 28 + i];// 第二个28位是低位存储的,所以浪费的1比特在最顶端。 - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode3(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 24; i++) - out[outf + 0] = (out[outf + 0] << 1) + in[inf + i]; - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; - for (int i = 0; i < 7; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 28 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode4(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 24; i++) - out[outf + 0] = (out[outf + 0] << 1) + in[inf + i]; - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; - for (int i = 0; i < 5; i++) - out[outf + 1] = (out[outf + 1] << 5) + in[inf + 28 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode5(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 24; i++) - out[outf + 0] = (out[outf + 0] << 1) + in[inf + i]; - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 7) + in[inf + 28 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode6(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 24; i++) - out[outf + 0] = (out[outf + 0] << 1) + in[inf + i]; - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; - for (int i = 0; i < 3; i++) - out[outf + 1] = (out[outf + 1] << 9) + in[inf + 28 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode7(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 24; i++) - out[outf + 0] = (out[outf + 0] << 1) + in[inf + i]; - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 14) + in[inf + 28 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode8(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 24; i++) - out[outf + 0] = (out[outf + 0] << 1) + in[inf + i]; - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 28) + in[inf + 28 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode9(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 12; i++) - out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; - for (int i = 0; i < 28; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 14 + i]; - - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode10(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 12; i++) { - out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; - - } - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; - for (int i = 0; i < 14; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 14 + i]; - - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode11(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 12; i++) - out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; - for (int i = 0; i < 9; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 14 + i]; - - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode12(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 12; i++) - out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; - for (int i = 0; i < 7; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 14 + i]; - - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode13(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 12; i++) - out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; - for (int i = 0; i < 5; i++) - out[outf + 1] = (out[outf + 1] << 5) + in[inf + 14 + i]; - - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode14(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 12; i++) - out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 7) + in[inf + 14 + i]; - - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode15(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 12; i++) - out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; - for (int i = 0; i < 3; i++) - out[outf + 1] = (out[outf + 1] << 9) + in[inf + 14 + i]; - - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode16(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 12; i++) - out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 14) + in[inf + 14 + i]; - - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode17(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 12; i++) - out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 28) + in[inf + 14 + i]; - - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode18(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 8; i++) - out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; - for (int i = 0; i < 28; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 9 + i]; - - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode19(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 8; i++) - out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; - for (int i = 0; i < 14; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 9 + i]; - - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode20(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 8; i++) - out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; - for (int i = 0; i < 9; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 9 + i]; - - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode21(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 8; i++) - out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; - for (int i = 0; i < 7; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 9 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode22(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 8; i++) - out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; - for (int i = 0; i < 5; i++) - out[outf + 1] = (out[outf + 1] << 5) + in[inf + 9 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode23(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 8; i++) - out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 7) + in[inf + 9 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode24(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 8; i++) - out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; - for (int i = 0; i < 3; i++) - out[outf + 1] = (out[outf + 1] << 9) + in[inf + 9 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode25(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 8; i++) - out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 14) + in[inf + 9 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode26(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 8; i++) - out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 28) + in[inf + 9 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode27(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 6; i++) - out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; - for (int i = 0; i < 28; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 7 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode28(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 6; i++) - out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; - for (int i = 0; i < 14; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 7 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode29(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 6; i++) - out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; - for (int i = 0; i < 9; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 7 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode30(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 6; i++) - out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; - for (int i = 0; i < 7; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 7 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode31(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 6; i++) - out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; - for (int i = 0; i < 5; i++) - out[outf + 1] = (out[outf + 1] << 5) + in[inf + 7 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode32(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 6; i++) - out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 7) + in[inf + 7 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode33(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 6; i++) - out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; - for (int i = 0; i < 3; i++) - out[outf + 1] = (out[outf + 1] << 9) + in[inf + 7 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode34(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 6; i++) - out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 14) + in[inf + 7 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode35(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 6; i++) - out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 28) + in[inf + 7 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode36(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 4; i++) - out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); - out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); - for (int i = 0; i < 28; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 5 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode37(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 4; i++) - out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); - out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); - for (int i = 0; i < 14; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 5 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode38(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 4; i++) - out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); - out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); - for (int i = 0; i < 9; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 5 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode39(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 4; i++) - out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); - out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); - for (int i = 0; i < 7; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 5 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode40(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 4; i++) - out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); - out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); - for (int i = 0; i < 5; i++) - out[outf + 1] = (out[outf + 1] << 5) + in[inf + 5 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode41(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 4; i++) - out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); - out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 7) + in[inf + 5 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode42(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 4; i++) - out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); - out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); - for (int i = 0; i < 3; i++) - out[outf + 1] = (out[outf + 1] << 9) + in[inf + 5 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode43(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 4; i++) - out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); - out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 14) + in[inf + 5 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode44(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 4; i++) - out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); - out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 28) + in[inf + 5 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode45(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 3; i++) - out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); - for (int i = 0; i < 28; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 4 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode46(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 3; i++) - out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); - for (int i = 0; i < 14; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 4 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode47(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 3; i++) - out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); - for (int i = 0; i < 9; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 4 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode48(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 3; i++) - out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); - for (int i = 0; i < 7; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 4 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode49(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 3; i++) - out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); - for (int i = 0; i < 5; i++) - out[outf + 1] = (out[outf + 1] << 5) + in[inf + 4 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode50(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 3; i++) - out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 7) + in[inf + 4 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode51(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 3; i++) - out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); - for (int i = 0; i < 3; i++) - out[outf + 1] = (out[outf + 1] << 9) + in[inf + 4 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode52(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 3; i++) - out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 14) + in[inf + 4 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode53(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 3; i++) - out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 28) + in[inf + 4 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode54(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 2; i++) - out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); - out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); - for (int i = 0; i < 28; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 3 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode55(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 2; i++) - out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); - out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); - for (int i = 0; i < 14; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 3 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode56(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 2; i++) - out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); - out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); - for (int i = 0; i < 9; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 3 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode57(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 2; i++) - out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); - out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); - for (int i = 0; i < 7; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 3 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode58(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 2; i++) - out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); - out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); - for (int i = 0; i < 5; i++) - out[outf + 1] = (out[outf + 1] << 5) + in[inf + 3 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode59(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 2; i++) - out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); - out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 7) + in[inf + 3 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode60(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 2; i++) - out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); - out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); - for (int i = 0; i < 3; i++) - out[outf + 1] = (out[outf + 1] << 9) + in[inf + 3 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode61(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 2; i++) - out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); - out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 14) + in[inf + 3 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode62(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - for (int i = 0; i < 2; i++) - out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; - out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); - out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 28) + in[inf + 3 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode63(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - - out[outf + 0] = (out[outf + 0] << 14) + in[inf]; - out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); - for (int i = 0; i < 28; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 2 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode64(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - out[outf + 0] = (out[outf + 0] << 14) + in[inf]; - out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); - for (int i = 0; i < 14; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 2 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode65(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - out[outf + 0] = (out[outf + 0] << 14) + in[inf]; - out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); - for (int i = 0; i < 9; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 2 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode66(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - out[outf + 0] = (out[outf + 0] << 14) + in[inf]; - out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); - for (int i = 0; i < 7; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 2 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode67(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - out[outf + 0] = (out[outf + 0] << 14) + in[inf]; - out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); - for (int i = 0; i < 5; i++) - out[outf + 1] = (out[outf + 1] << 5) + in[inf + 2 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode68(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - out[outf + 0] = (out[outf + 0] << 14) + in[inf]; - out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 7) + in[inf + 2 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode69(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - out[outf + 0] = (out[outf + 0] << 14) + in[inf]; - out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); - for (int i = 0; i < 3; i++) - out[outf + 1] = (out[outf + 1] << 9) + in[inf + 2 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode70(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - out[outf + 0] = (out[outf + 0] << 14) + in[inf]; - out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 14) + in[inf + 2 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode71(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - out[outf + 0] = (out[outf + 0] << 14) + in[inf]; - out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 28) + in[inf + 2 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode72(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - - out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); - for (int i = 0; i < 28; i++) - out[outf + 1] = (out[outf + 1] << 1) + in[inf + 1 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode73(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); - for (int i = 0; i < 14; i++) - out[outf + 1] = (out[outf + 1] << 2) + in[inf + 1 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode74(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); - for (int i = 0; i < 9; i++) - out[outf + 1] = (out[outf + 1] << 3) + in[inf + 1 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode75(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); - for (int i = 0; i < 7; i++) - out[outf + 1] = (out[outf + 1] << 4) + in[inf + 1 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode76(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); - for (int i = 0; i < 5; i++) - out[outf + 1] = (out[outf + 1] << 5) + in[inf + 1 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode77(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); - for (int i = 0; i < 4; i++) - out[outf + 1] = (out[outf + 1] << 7) + in[inf + 1 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode78(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); - for (int i = 0; i < 3; i++) - out[outf + 1] = (out[outf + 1] << 9) + in[inf + 1 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode79(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); - for (int i = 0; i < 2; i++) - out[outf + 1] = (out[outf + 1] << 14) + in[inf + 1 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - private void encode80(final int[] in, final int inf, final int code, final int[] out, - final int outf) { - out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); - out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); - for (int i = 0; i < 1; i++) - out[outf + 1] = (out[outf + 1] << 28) + in[inf + 1 + i]; - out[outf + 0] = code << 24 | out[outf + 0]; - - } - - @Override - public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { - if (inlength == 0) - return; - final int outlength = in[inpos.get()]; - inpos.increment(); - headlessUncompress(in, inpos, inlength, out, outpos, outlength); - } - - - - private void decode80(int val, int valn, int[] out, int currentPos) { - // number : 1, bitwidth : 28 - out[currentPos++] = (val << 8) >>> 4 | (valn >>> 28); - // number : 1, bitwidth : 28 - out[currentPos++] = (valn << 4) >>> 4; - } - - private void decode79(int val, int valn, int[] out, int currentPos) { - // number : 1, bitwidth : 28 - out[currentPos++] = (val << 8) >>> 4 | (valn >>> 28); - // number :2, bitwidth : 14 - out[currentPos++] = (valn << 4) >>> 18; - out[currentPos++] = (valn << 18) >>> 18; - } - - private void decode78(int val, int valn, int[] out, int currentPos) { - // number : 1, bitwidth : 28 - out[currentPos++] = (val << 8) >>> 4 | (valn >>> 27); - // number : 3, bitwidth :9 - out[currentPos++] = (valn << 5) >>> 23; - out[currentPos++] = (valn << 14) >>> 23; - out[currentPos++] = (valn << 23) >>> 23; - } - - private void decode77(int val, int valn, int[] out, int currentPos) { - // number : 1, bitwidth : 28 - out[currentPos++] = (val << 8) >>> 4 | (valn >>> 28); - // number : 4, bitwidth : 7 - out[currentPos++] = (valn << 4) >>> 25; - out[currentPos++] = (valn << 11) >>> 25; - out[currentPos++] = (valn << 18) >>> 25; - out[currentPos++] = (valn << 25) >>> 25; - } - - private void decode76(int val, int valn, int[] out, int currentPos) { - // number : 5, bitwidth : 5 - out[currentPos++] = (val << 8) >>> 4 | (valn >>> 25); - // number : 14, bitwidth : 2 - out[currentPos++] = (valn << 7) >>> 27; - out[currentPos++] = (valn << 12) >>> 27; - out[currentPos++] = (valn << 17) >>> 27; - out[currentPos++] = (valn << 22) >>> 27; - out[currentPos++] = (valn << 27) >>> 27; - } - - private void decode75(int val, int valn, int[] out, int currentPos) { - // number : 1, bitwidth : 28 - out[currentPos++] = (val << 8) >>> 4 | (valn >>> 28); - // number : 7, bitwidth : 4 - out[currentPos++] = (valn << 4) >>> 28; - out[currentPos++] = (valn << 8) >>> 28; - out[currentPos++] = (valn << 12) >>> 28; - out[currentPos++] = (valn << 16) >>> 28; - out[currentPos++] = (valn << 20) >>> 28; - out[currentPos++] = (valn << 24) >>> 28; - out[currentPos++] = (valn << 28) >>> 28; - } - - private void decode74(int val, int valn, int[] out, int currentPos) { - // number : 1, bitwidth : 28 - out[currentPos++] = (val << 8) >>> 4 | (valn >>> 27); - // number : 9, bitwidth : 3 - out[currentPos++] = (valn << 5) >>> 29; - out[currentPos++] = (valn << 8) >>> 29; - out[currentPos++] = (valn << 11) >>> 29; - out[currentPos++] = (valn << 14) >>> 29; - out[currentPos++] = (valn << 17) >>> 29; - out[currentPos++] = (valn << 20) >>> 29; - out[currentPos++] = (valn << 23) >>> 29; - out[currentPos++] = (valn << 26) >>> 29; - out[currentPos++] = (valn << 29) >>> 29; - } - - private void decode73(int val, int valn, int[] out, int currentPos) { - // number : 1, bitwidth : 28 - out[currentPos++] = (val << 8) >>> 4 | (valn >>> 28); - // number : 14, bitwidth : 2 - out[currentPos++] = (valn << 4) >>> 30; - out[currentPos++] = (valn << 6) >>> 30; - out[currentPos++] = (valn << 8) >>> 30; - out[currentPos++] = (valn << 10) >>> 30; - out[currentPos++] = (valn << 12) >>> 30; - out[currentPos++] = (valn << 14) >>> 30; - out[currentPos++] = (valn << 16) >>> 30; - out[currentPos++] = (valn << 18) >>> 30; - out[currentPos++] = (valn << 20) >>> 30; - out[currentPos++] = (valn << 22) >>> 30; // 10 - out[currentPos++] = (valn << 24) >>> 30; - out[currentPos++] = (valn << 26) >>> 30; - out[currentPos++] = (valn << 28) >>> 30; - out[currentPos++] = (valn << 30) >>> 30; - } - - private void decode72(int val, int valn, int[] out, int currentPos) { - // number : 1, bitwidth : 28 - out[currentPos++] = (val << 8) >>> 4 | (valn >>> 28); - // number : 28, bitwidth : 1 - out[currentPos++] = (valn << 4) >>> 31; - out[currentPos++] = (valn << 5) >>> 31; - out[currentPos++] = (valn << 6) >>> 31; - out[currentPos++] = (valn << 7) >>> 31; - out[currentPos++] = (valn << 8) >>> 31; - out[currentPos++] = (valn << 9) >>> 31; - out[currentPos++] = (valn << 10) >>> 31; - out[currentPos++] = (valn << 11) >>> 31; - out[currentPos++] = (valn << 12) >>> 31; - out[currentPos++] = (valn << 13) >>> 31; // 10 - out[currentPos++] = (valn << 14) >>> 31; - out[currentPos++] = (valn << 15) >>> 31; - out[currentPos++] = (valn << 16) >>> 31; - out[currentPos++] = (valn << 17) >>> 31; - out[currentPos++] = (valn << 18) >>> 31; - out[currentPos++] = (valn << 19) >>> 31; - out[currentPos++] = (valn << 20) >>> 31; - out[currentPos++] = (valn << 21) >>> 31; - out[currentPos++] = (valn << 22) >>> 31; - out[currentPos++] = (valn << 23) >>> 31; // 20 - out[currentPos++] = (valn << 24) >>> 31; - out[currentPos++] = (valn << 25) >>> 31; - out[currentPos++] = (valn << 26) >>> 31; - out[currentPos++] = (valn << 27) >>> 31; - out[currentPos++] = (valn << 28) >>> 31; - out[currentPos++] = (valn << 29) >>> 31; - out[currentPos++] = (valn << 30) >>> 31; - out[currentPos++] = (valn << 31) >>> 31; - } - - private void decode71(int val, int valn, int[] out, int currentPos) { - // number : 2, bitwidth : 14 - out[currentPos++] = (val << 8) >>> 18; - out[currentPos++] = (val << 22) >>> 18 | (valn >>> 28); - // number : 1, bitwidth : 28 - out[currentPos++] = (valn << 4) >>> 4; - } - - private void decode70(int val, int valn, int[] out, int currentPos) { - // number : 2, bitwidth : 14 - out[currentPos++] = (val << 8) >>> 18; - out[currentPos++] = (val << 22) >>> 18 | (valn >>> 28); - // number : 2, bitwidth : 14 - out[currentPos++] = (valn << 4) >>> 18; - out[currentPos++] = (valn << 18) >>> 18; - } - - private void decode69(int val, int valn, int[] out, int currentPos) { - // number : 2, bitwidth : 14 - out[currentPos++] = (val << 8) >>> 18; - out[currentPos++] = (val << 22) >>> 18 | (valn >>> 27); - // number : 3, bitwidth : 9 - out[currentPos++] = (valn << 5) >>> 23; - out[currentPos++] = (valn << 14) >>> 23; - out[currentPos++] = (valn << 23) >>> 23; - } - - private void decode68(int val, int valn, int[] out, int currentPos) { - // number : 2, bitwidth : 14 - out[currentPos++] = (val << 8) >>> 18; - out[currentPos++] = (val << 22) >>> 18 | (valn >>> 28); - // number : 4, bitwidth : 7 - out[currentPos++] = (valn << 4) >>> 25; - out[currentPos++] = (valn << 11) >>> 25; - out[currentPos++] = (valn << 18) >>> 25; - out[currentPos++] = (valn << 25) >>> 25; - } - - private void decode67(int val, int valn, int[] out, int currentPos) { - // number : 2, bitwidth : 14 - out[currentPos++] = (val << 8) >>> 18; - out[currentPos++] = (val << 22) >>> 18 | (valn >>> 25); - // number : 5, bitwidth : 5 - out[currentPos++] = (valn << 7) >>> 27; - out[currentPos++] = (valn << 12) >>> 27; - out[currentPos++] = (valn << 17) >>> 27; - out[currentPos++] = (valn << 22) >>> 27; - out[currentPos++] = (valn << 27) >>> 27; - } - - private void decode66(int val, int valn, int[] out, int currentPos) { - // number : 2, bitwidth : 14 - out[currentPos++] = (val << 8) >>> 18; - out[currentPos++] = (val << 22) >>> 18 | (valn >>> 28); - // number : 7, bitwidth : 4 - out[currentPos++] = (valn << 4) >>> 28; - out[currentPos++] = (valn << 8) >>> 28; - out[currentPos++] = (valn << 12) >>> 28; - out[currentPos++] = (valn << 16) >>> 28; - out[currentPos++] = (valn << 20) >>> 28; - out[currentPos++] = (valn << 24) >>> 28; - out[currentPos++] = (valn << 28) >>> 28; - } - - private void decode65(int val, int valn, int[] out, int currentPos) { - // number : 2, bitwidth : 14 - out[currentPos++] = (val << 8) >>> 18; - out[currentPos++] = (val << 22) >>> 18 | (valn >>> 27); - // number : 9, bitwidth : 3 - out[currentPos++] = (valn << 5) >>> 29; - out[currentPos++] = (valn << 8) >>> 29; - out[currentPos++] = (valn << 11) >>> 29; - out[currentPos++] = (valn << 14) >>> 29; - out[currentPos++] = (valn << 17) >>> 29; - out[currentPos++] = (valn << 20) >>> 29; - out[currentPos++] = (valn << 23) >>> 29; - out[currentPos++] = (valn << 26) >>> 29; - out[currentPos++] = (valn << 29) >>> 29; - } - - private void decode64(int val, int valn, int[] out, int currentPos) { - // number : 2, bitwidth : 14 - out[currentPos++] = (val << 8) >>> 18; - out[currentPos++] = (val << 22) >>> 18 | (valn >>> 28); - // number : 14, bitwidth : 2 - out[currentPos++] = (valn << 4) >>> 30; - out[currentPos++] = (valn << 6) >>> 30; - out[currentPos++] = (valn << 8) >>> 30; - out[currentPos++] = (valn << 10) >>> 30; - out[currentPos++] = (valn << 12) >>> 30; - out[currentPos++] = (valn << 14) >>> 30; - out[currentPos++] = (valn << 16) >>> 30; - out[currentPos++] = (valn << 18) >>> 30; - out[currentPos++] = (valn << 20) >>> 30; - out[currentPos++] = (valn << 22) >>> 30; // 10 - out[currentPos++] = (valn << 24) >>> 30; - out[currentPos++] = (valn << 26) >>> 30; - out[currentPos++] = (valn << 28) >>> 30; - out[currentPos++] = (valn << 30) >>> 30; - } - - private void decode63(int val, int valn, int[] out, int currentPos) { - // number : 2, bitwidth : 14 - out[currentPos++] = (val << 8) >>> 18; - out[currentPos++] = (val << 22) >>> 18 | (valn >>> 28); - // number : 28, bitwidth : 1 - out[currentPos++] = (valn << 4) >>> 31; - out[currentPos++] = (valn << 5) >>> 31; - out[currentPos++] = (valn << 6) >>> 31; - out[currentPos++] = (valn << 7) >>> 31; - out[currentPos++] = (valn << 8) >>> 31; - out[currentPos++] = (valn << 9) >>> 31; - out[currentPos++] = (valn << 10) >>> 31; - out[currentPos++] = (valn << 11) >>> 31; - out[currentPos++] = (valn << 12) >>> 31; - out[currentPos++] = (valn << 13) >>> 31; // 10 - out[currentPos++] = (valn << 14) >>> 31; - out[currentPos++] = (valn << 15) >>> 31; - out[currentPos++] = (valn << 16) >>> 31; - out[currentPos++] = (valn << 17) >>> 31; - out[currentPos++] = (valn << 18) >>> 31; - out[currentPos++] = (valn << 19) >>> 31; - out[currentPos++] = (valn << 20) >>> 31; - out[currentPos++] = (valn << 21) >>> 31; - out[currentPos++] = (valn << 22) >>> 31; - out[currentPos++] = (valn << 23) >>> 31; // 20 - out[currentPos++] = (valn << 24) >>> 31; - out[currentPos++] = (valn << 25) >>> 31; - out[currentPos++] = (valn << 26) >>> 31; - out[currentPos++] = (valn << 27) >>> 31; - out[currentPos++] = (valn << 28) >>> 31; - out[currentPos++] = (valn << 29) >>> 31; - out[currentPos++] = (valn << 30) >>> 31; - out[currentPos++] = (valn << 31) >>> 31; - } - - private void decode62(int val, int valn, int[] out, int currentPos) { - // number : 3, bitwidth : 9 - out[currentPos++] = (val << 8) >>> 23; - out[currentPos++] = (val << 17) >>> 23; - out[currentPos++] = (val << 26) >>> 23 | (valn >>> 28); - // number : 1, bitwidth : 28 - out[currentPos++] = (valn << 4) >>> 4; - } - - private void decode61(int val, int valn, int[] out, int currentPos) { - // number : 3, bitwidth : 9 - out[currentPos++] = (val << 8) >>> 23; - out[currentPos++] = (val << 17) >>> 23; - out[currentPos++] = (val << 26) >>> 23 | (valn >>> 28); - // number : 2, bitwidth : 14 - out[currentPos++] = (valn << 4) >>> 18; - out[currentPos++] = (valn << 18) >>> 18; - } - - private void decode60(int val, int valn, int[] out, int currentPos) { - // number : 3, bitwidth : 9 - out[currentPos++] = (val << 8) >>> 23; - out[currentPos++] = (val << 17) >>> 23; - out[currentPos++] = (val << 26) >>> 23 | (valn >>> 27); - // number : 3, bitwidth : 9 - out[currentPos++] = (valn << 5) >>> 23; - out[currentPos++] = (valn << 14) >>> 23; - out[currentPos++] = (valn << 23) >>> 23; - } - - private void decode59(int val, int valn, int[] out, int currentPos) { - // number : 3, bitwidth : 9 - out[currentPos++] = (val << 8) >>> 23; - out[currentPos++] = (val << 17) >>> 23; - out[currentPos++] = (val << 26) >>> 23 | (valn >>> 28); - // number : 4, bitwidth : 7 - out[currentPos++] = (valn << 4) >>> 25; - out[currentPos++] = (valn << 11) >>> 25; - out[currentPos++] = (valn << 18) >>> 25; - out[currentPos++] = (valn << 25) >>> 25; - } - - private void decode58(int val, int valn, int[] out, int currentPos) { - // number : 3, bitwidth : 9 - out[currentPos++] = (val << 8) >>> 23; - out[currentPos++] = (val << 17) >>> 23; - out[currentPos++] = (val << 26) >>> 23 | (valn >>> 25); - // number : 5, bitwidth : 5 - out[currentPos++] = (valn << 7) >>> 27; - out[currentPos++] = (valn << 12) >>> 27; - out[currentPos++] = (valn << 17) >>> 27; - out[currentPos++] = (valn << 22) >>> 27; - out[currentPos++] = (valn << 27) >>> 27; - } - - private void decode57(int val, int valn, int[] out, int currentPos) { - // number : 3, bitwidth : 9 - out[currentPos++] = (val << 8) >>> 23; - out[currentPos++] = (val << 17) >>> 23; - out[currentPos++] = (val << 26) >>> 23 | (valn >>> 28); - // number : 7, bitwidth : 4 - out[currentPos++] = (valn << 4) >>> 28; - out[currentPos++] = (valn << 8) >>> 28; - out[currentPos++] = (valn << 12) >>> 28; - out[currentPos++] = (valn << 16) >>> 28; - out[currentPos++] = (valn << 20) >>> 28; - out[currentPos++] = (valn << 24) >>> 28; - out[currentPos++] = (valn << 28) >>> 28; - } - - private void decode56(int val, int valn, int[] out, int currentPos) { - // number : 3, bitwidth : 9 - out[currentPos++] = (val << 8) >>> 23; - out[currentPos++] = (val << 17) >>> 23; - out[currentPos++] = (val << 26) >>> 23 | (valn >>> 27); - // number : 9, bitwidth : 3 - out[currentPos++] = (valn << 5) >>> 29; - out[currentPos++] = (valn << 8) >>> 29; - out[currentPos++] = (valn << 11) >>> 29; - out[currentPos++] = (valn << 14) >>> 29; - out[currentPos++] = (valn << 17) >>> 29; - out[currentPos++] = (valn << 20) >>> 29; - out[currentPos++] = (valn << 23) >>> 29; - out[currentPos++] = (valn << 26) >>> 29; - out[currentPos++] = (valn << 29) >>> 29; - } - - private void decode55(int val, int valn, int[] out, int currentPos) { - // number : 3, bitwidth : 9 - out[currentPos++] = (val << 8) >>> 23; - out[currentPos++] = (val << 17) >>> 23; - out[currentPos++] = (val << 26) >>> 23 | (valn >>> 28); - // number : 14, bitwidth : 2 - out[currentPos++] = (valn << 4) >>> 30; - out[currentPos++] = (valn << 6) >>> 30; - out[currentPos++] = (valn << 8) >>> 30; - out[currentPos++] = (valn << 10) >>> 30; - out[currentPos++] = (valn << 12) >>> 30; - out[currentPos++] = (valn << 14) >>> 30; - out[currentPos++] = (valn << 16) >>> 30; - out[currentPos++] = (valn << 18) >>> 30; - out[currentPos++] = (valn << 20) >>> 30; - out[currentPos++] = (valn << 22) >>> 30; // 10 - out[currentPos++] = (valn << 24) >>> 30; - out[currentPos++] = (valn << 26) >>> 30; - out[currentPos++] = (valn << 28) >>> 30; - out[currentPos++] = (valn << 30) >>> 30; - } - - private void decode54(int val, int valn, int[] out, int currentPos) { - // number : 3, bitwidth : 9 - out[currentPos++] = (val << 8) >>> 23; - out[currentPos++] = (val << 17) >>> 23; - out[currentPos++] = (val << 26) >>> 23 | (valn >>> 28); - // number : 28, bitwidth : 1 - out[currentPos++] = (valn << 4) >>> 31; - out[currentPos++] = (valn << 5) >>> 31; - out[currentPos++] = (valn << 6) >>> 31; - out[currentPos++] = (valn << 7) >>> 31; - out[currentPos++] = (valn << 8) >>> 31; - out[currentPos++] = (valn << 9) >>> 31; - out[currentPos++] = (valn << 10) >>> 31; - out[currentPos++] = (valn << 11) >>> 31; - out[currentPos++] = (valn << 12) >>> 31; - out[currentPos++] = (valn << 13) >>> 31; // 10 - out[currentPos++] = (valn << 14) >>> 31; - out[currentPos++] = (valn << 15) >>> 31; - out[currentPos++] = (valn << 16) >>> 31; - out[currentPos++] = (valn << 17) >>> 31; - out[currentPos++] = (valn << 18) >>> 31; - out[currentPos++] = (valn << 19) >>> 31; - out[currentPos++] = (valn << 20) >>> 31; - out[currentPos++] = (valn << 21) >>> 31; - out[currentPos++] = (valn << 22) >>> 31; - out[currentPos++] = (valn << 23) >>> 31; // 20 - out[currentPos++] = (valn << 24) >>> 31; - out[currentPos++] = (valn << 25) >>> 31; - out[currentPos++] = (valn << 26) >>> 31; - out[currentPos++] = (valn << 27) >>> 31; - out[currentPos++] = (valn << 28) >>> 31; - out[currentPos++] = (valn << 29) >>> 31; - out[currentPos++] = (valn << 30) >>> 31; - out[currentPos++] = (valn << 31) >>> 31; - } - - private void decode53(int val, int valn, int[] out, int currentPos) { - // number : 4, bitwidth : 7 - out[currentPos++] = (val << 8) >>> 25; - out[currentPos++] = (val << 15) >>> 25; - out[currentPos++] = (val << 22) >>> 25; - out[currentPos++] = (val << 29) >>> 25 | (valn >>> 28); - // number : 1, bitwidth : 28 - out[currentPos++] = (valn << 4) >>> 4; - } - - private void decode52(int val, int valn, int[] out, int currentPos) { - // number : 4, bitwidth : 7 - out[currentPos++] = (val << 8) >>> 25; - out[currentPos++] = (val << 15) >>> 25; - out[currentPos++] = (val << 22) >>> 25; - out[currentPos++] = (val << 29) >>> 25 | (valn >>> 28); - // number : 2, bitwidth : 14 - out[currentPos++] = (valn << 4) >>> 18; - out[currentPos++] = (valn << 18) >>> 18; - } - - private void decode51(int val, int valn, int[] out, int currentPos) { - // number : 4, bitwidth : 7 - out[currentPos++] = (val << 8) >>> 25; - out[currentPos++] = (val << 15) >>> 25; - out[currentPos++] = (val << 22) >>> 25; - out[currentPos++] = (val << 29) >>> 25 | (valn >>> 27); - // number : 3, bitwidth : 9 - out[currentPos++] = (valn << 5) >>> 23; - out[currentPos++] = (valn << 14) >>> 23; - out[currentPos++] = (valn << 23) >>> 23; - } - - private void decode50(int val, int valn, int[] out, int currentPos) { - // number : 4, bitwidth : 7 - out[currentPos++] = (val << 8) >>> 25; - out[currentPos++] = (val << 15) >>> 25; - out[currentPos++] = (val << 22) >>> 25; - out[currentPos++] = (val << 29) >>> 25 | (valn >>> 28); - // number : 4, bitwidth : 7 - out[currentPos++] = (valn << 4) >>> 25; - out[currentPos++] = (valn << 11) >>> 25; - out[currentPos++] = (valn << 18) >>> 25; - out[currentPos++] = (valn << 25) >>> 25; - } - - private void decode49(int val, int valn, int[] out, int currentPos) { - // number : 4, bitwidth : 7 - out[currentPos++] = (val << 8) >>> 25; - out[currentPos++] = (val << 15) >>> 25; - out[currentPos++] = (val << 22) >>> 25; - out[currentPos++] = (val << 29) >>> 25 | (valn >>> 25); - // number : 5, bitwidth : 5 - out[currentPos++] = (valn << 7) >>> 27; - out[currentPos++] = (valn << 12) >>> 27; - out[currentPos++] = (valn << 17) >>> 27; - out[currentPos++] = (valn << 22) >>> 27; - out[currentPos++] = (valn << 27) >>> 27; - } - - private void decode48(int val, int valn, int[] out, int currentPos) { - // number : 4, bitwidth : 7 - out[currentPos++] = (val << 8) >>> 25; - out[currentPos++] = (val << 15) >>> 25; - out[currentPos++] = (val << 22) >>> 25; - out[currentPos++] = (val << 29) >>> 25 | (valn >>> 28); - // number : 7, bitwidth : 4 - out[currentPos++] = (valn << 4) >>> 28; - out[currentPos++] = (valn << 8) >>> 28; - out[currentPos++] = (valn << 12) >>> 28; - out[currentPos++] = (valn << 16) >>> 28; - out[currentPos++] = (valn << 20) >>> 28; - out[currentPos++] = (valn << 24) >>> 28; - out[currentPos++] = (valn << 28) >>> 28; - } - - private void decode47(int val, int valn, int[] out, int currentPos) { - // number : 4, bitwidth : 7 - out[currentPos++] = (val << 8) >>> 25; - out[currentPos++] = (val << 15) >>> 25; - out[currentPos++] = (val << 22) >>> 25; - out[currentPos++] = (val << 29) >>> 25 | (valn >>> 27); - // number : 9, bitwidth : 3 - out[currentPos++] = (valn << 5) >>> 29; - out[currentPos++] = (valn << 8) >>> 29; - out[currentPos++] = (valn << 11) >>> 29; - out[currentPos++] = (valn << 14) >>> 29; - out[currentPos++] = (valn << 17) >>> 29; - out[currentPos++] = (valn << 20) >>> 29; - out[currentPos++] = (valn << 23) >>> 29; - out[currentPos++] = (valn << 26) >>> 29; - out[currentPos++] = (valn << 29) >>> 29; - } - - private void decode46(int val, int valn, int[] out, int currentPos) { - // number : 4, bitwidth : 7 - out[currentPos++] = (val << 8) >>> 25; - out[currentPos++] = (val << 15) >>> 25; - out[currentPos++] = (val << 22) >>> 25; - out[currentPos++] = (val << 29) >>> 25 | (valn >>> 28); - // number : 14, bitwidth : 2 - out[currentPos++] = (valn << 4) >>> 30; - out[currentPos++] = (valn << 6) >>> 30; - out[currentPos++] = (valn << 8) >>> 30; - out[currentPos++] = (valn << 10) >>> 30; - out[currentPos++] = (valn << 12) >>> 30; - out[currentPos++] = (valn << 14) >>> 30; - out[currentPos++] = (valn << 16) >>> 30; - out[currentPos++] = (valn << 18) >>> 30; - out[currentPos++] = (valn << 20) >>> 30; - out[currentPos++] = (valn << 22) >>> 30; // 10 - out[currentPos++] = (valn << 24) >>> 30; - out[currentPos++] = (valn << 26) >>> 30; - out[currentPos++] = (valn << 28) >>> 30; - out[currentPos++] = (valn << 30) >>> 30; - } - - private void decode45(int val, int valn, int[] out, int currentPos) { - // number : 4, bitwidth : 7 - out[currentPos++] = (val << 8) >>> 25; - out[currentPos++] = (val << 15) >>> 25; - out[currentPos++] = (val << 22) >>> 25; - out[currentPos++] = (val << 29) >>> 25 | (valn >>> 28); - // number : 28, bitwidth : 1 - out[currentPos++] = (valn << 4) >>> 31; - out[currentPos++] = (valn << 5) >>> 31; - out[currentPos++] = (valn << 6) >>> 31; - out[currentPos++] = (valn << 7) >>> 31; - out[currentPos++] = (valn << 8) >>> 31; - out[currentPos++] = (valn << 9) >>> 31; - out[currentPos++] = (valn << 10) >>> 31; - out[currentPos++] = (valn << 11) >>> 31; - out[currentPos++] = (valn << 12) >>> 31; - out[currentPos++] = (valn << 13) >>> 31; // 10 - out[currentPos++] = (valn << 14) >>> 31; - out[currentPos++] = (valn << 15) >>> 31; - out[currentPos++] = (valn << 16) >>> 31; - out[currentPos++] = (valn << 17) >>> 31; - out[currentPos++] = (valn << 18) >>> 31; - out[currentPos++] = (valn << 19) >>> 31; - out[currentPos++] = (valn << 20) >>> 31; - out[currentPos++] = (valn << 21) >>> 31; - out[currentPos++] = (valn << 22) >>> 31; - out[currentPos++] = (valn << 23) >>> 31; // 20 - out[currentPos++] = (valn << 24) >>> 31; - out[currentPos++] = (valn << 25) >>> 31; - out[currentPos++] = (valn << 26) >>> 31; - out[currentPos++] = (valn << 27) >>> 31; - out[currentPos++] = (valn << 28) >>> 31; - out[currentPos++] = (valn << 29) >>> 31; - out[currentPos++] = (valn << 30) >>> 31; - out[currentPos++] = (valn << 31) >>> 31; - } - - private void decode44(int val, int valn, int[] out, int currentPos) { - // number : 5, bitwidth : 5 - out[currentPos++] = (val << 8) >>> 27; - out[currentPos++] = (val << 13) >>> 27; - out[currentPos++] = (val << 18) >>> 27; - out[currentPos++] = (val << 23) >>> 27; - out[currentPos++] = (val << 28) >>> 27 | (valn >>> 28); - // number : 1, bitwidth : 28 - out[currentPos++] = (valn << 4) >>> 4; - } - - private void decode43(int val, int valn, int[] out, int currentPos) { - // number : 5, bitwidth : 5 - out[currentPos++] = (val << 8) >>> 27; - out[currentPos++] = (val << 13) >>> 27; - out[currentPos++] = (val << 18) >>> 27; - out[currentPos++] = (val << 23) >>> 27; - out[currentPos++] = (val << 28) >>> 27 | (valn >>> 28); - // number : 2, bitwidth : 14 - out[currentPos++] = (valn << 4) >>> 18; - out[currentPos++] = (valn << 18) >>> 18; - } - - private void decode42(int val, int valn, int[] out, int currentPos) { - // number : 5, bitwidth : 5 - out[currentPos++] = (val << 8) >>> 27; - out[currentPos++] = (val << 13) >>> 27; - out[currentPos++] = (val << 18) >>> 27; - out[currentPos++] = (val << 23) >>> 27; - out[currentPos++] = (val << 28) >>> 27 | (valn >>> 27); - // number : 3, bitwidth : 9 - out[currentPos++] = (valn << 5) >>> 23; - out[currentPos++] = (valn << 14) >>> 23; - out[currentPos++] = (valn << 23) >>> 23; - } - - private void decode41(int val, int valn, int[] out, int currentPos) { - // number : 5, bitwidth : 5 - out[currentPos++] = (val << 8) >>> 27; - out[currentPos++] = (val << 13) >>> 27; - out[currentPos++] = (val << 18) >>> 27; - out[currentPos++] = (val << 23) >>> 27; - out[currentPos++] = (val << 28) >>> 27 | (valn >>> 28); - // number : 4, bitwidth : 7 - out[currentPos++] = (valn << 4) >>> 25; - out[currentPos++] = (valn << 11) >>> 25; - out[currentPos++] = (valn << 18) >>> 25; - out[currentPos++] = (valn << 25) >>> 25; - } - - private void decode40(int val, int valn, int[] out, int currentPos) { - // number : 5, bitwidth : 5 - out[currentPos++] = (val << 8) >>> 27; - out[currentPos++] = (val << 13) >>> 27; - out[currentPos++] = (val << 18) >>> 27; - out[currentPos++] = (val << 23) >>> 27; - out[currentPos++] = (val << 28) >>> 27 | (valn >>> 25); - // number : 5, bitwidth : 5 - out[currentPos++] = (valn << 7) >>> 27; - out[currentPos++] = (valn << 12) >>> 27; - out[currentPos++] = (valn << 17) >>> 27; - out[currentPos++] = (valn << 22) >>> 27; - out[currentPos++] = (valn << 27) >>> 27; - } - - private void decode39(int val, int valn, int[] out, int currentPos) { - // number : 5, bitwidth : 5 - out[currentPos++] = (val << 8) >>> 27; - out[currentPos++] = (val << 13) >>> 27; - out[currentPos++] = (val << 18) >>> 27; - out[currentPos++] = (val << 23) >>> 27; - out[currentPos++] = (val << 28) >>> 27 | (valn >>> 28); - // number : 7, bitwidth : 4 - out[currentPos++] = (valn << 4) >>> 28; - out[currentPos++] = (valn << 8) >>> 28; - out[currentPos++] = (valn << 12) >>> 28; - out[currentPos++] = (valn << 16) >>> 28; - out[currentPos++] = (valn << 20) >>> 28; - out[currentPos++] = (valn << 24) >>> 28; - out[currentPos++] = (valn << 28) >>> 28; - } - - private void decode38(int val, int valn, int[] out, int currentPos) { - // number : 5, bitwidth : 5 - out[currentPos++] = (val << 8) >>> 27; - out[currentPos++] = (val << 13) >>> 27; - out[currentPos++] = (val << 18) >>> 27; - out[currentPos++] = (val << 23) >>> 27; - out[currentPos++] = (val << 28) >>> 27 | (valn >>> 27); - // number : 9, bitwidth : 3 - out[currentPos++] = (valn << 5) >>> 29; - out[currentPos++] = (valn << 8) >>> 29; - out[currentPos++] = (valn << 11) >>> 29; - out[currentPos++] = (valn << 14) >>> 29; - out[currentPos++] = (valn << 17) >>> 29; - out[currentPos++] = (valn << 20) >>> 29; - out[currentPos++] = (valn << 23) >>> 29; - out[currentPos++] = (valn << 26) >>> 29; - out[currentPos++] = (valn << 29) >>> 29; - } - - private void decode37(int val, int valn, int[] out, int currentPos) { - // number : 5, bitwidth : 5 - out[currentPos++] = (val << 8) >>> 27; - out[currentPos++] = (val << 13) >>> 27; - out[currentPos++] = (val << 18) >>> 27; - out[currentPos++] = (val << 23) >>> 27; - out[currentPos++] = (val << 28) >>> 27 | (valn >>> 28); - // number : 14, bitwidth : 2 - out[currentPos++] = (valn << 4) >>> 30; - out[currentPos++] = (valn << 6) >>> 30; - out[currentPos++] = (valn << 8) >>> 30; - out[currentPos++] = (valn << 10) >>> 30; - out[currentPos++] = (valn << 12) >>> 30; - out[currentPos++] = (valn << 14) >>> 30; - out[currentPos++] = (valn << 16) >>> 30; - out[currentPos++] = (valn << 18) >>> 30; - out[currentPos++] = (valn << 20) >>> 30; - out[currentPos++] = (valn << 22) >>> 30; // 10 - out[currentPos++] = (valn << 24) >>> 30; - out[currentPos++] = (valn << 26) >>> 30; - out[currentPos++] = (valn << 28) >>> 30; - out[currentPos++] = (valn << 30) >>> 30; - } - - private void decode36(int val, int valn, int[] out, int currentPos) { - // number : 5, bitwidth : 5 - out[currentPos++] = (val << 8) >>> 27; - out[currentPos++] = (val << 13) >>> 27; - out[currentPos++] = (val << 18) >>> 27; - out[currentPos++] = (val << 23) >>> 27; - out[currentPos++] = (val << 28) >>> 27 | (valn >>> 28); - // number : 28, bitwidth : 1 - out[currentPos++] = (valn << 4) >>> 31; - out[currentPos++] = (valn << 5) >>> 31; - out[currentPos++] = (valn << 6) >>> 31; - out[currentPos++] = (valn << 7) >>> 31; - out[currentPos++] = (valn << 8) >>> 31; - out[currentPos++] = (valn << 9) >>> 31; - out[currentPos++] = (valn << 10) >>> 31; - out[currentPos++] = (valn << 11) >>> 31; - out[currentPos++] = (valn << 12) >>> 31; - out[currentPos++] = (valn << 13) >>> 31; // 10 - out[currentPos++] = (valn << 14) >>> 31; - out[currentPos++] = (valn << 15) >>> 31; - out[currentPos++] = (valn << 16) >>> 31; - out[currentPos++] = (valn << 17) >>> 31; - out[currentPos++] = (valn << 18) >>> 31; - out[currentPos++] = (valn << 19) >>> 31; - out[currentPos++] = (valn << 20) >>> 31; - out[currentPos++] = (valn << 21) >>> 31; - out[currentPos++] = (valn << 22) >>> 31; - out[currentPos++] = (valn << 23) >>> 31; // 20 - out[currentPos++] = (valn << 24) >>> 31; - out[currentPos++] = (valn << 25) >>> 31; - out[currentPos++] = (valn << 26) >>> 31; - out[currentPos++] = (valn << 27) >>> 31; - out[currentPos++] = (valn << 28) >>> 31; - out[currentPos++] = (valn << 29) >>> 31; - out[currentPos++] = (valn << 30) >>> 31; - out[currentPos++] = (valn << 31) >>> 31; - } - - private void decode35(int val, int valn, int[] out, int currentPos) { - // number : 7, bitwidth : 4 - out[currentPos++] = (val << 8) >>> 28; - out[currentPos++] = (val << 12) >>> 28; - out[currentPos++] = (val << 16) >>> 28; - out[currentPos++] = (val << 20) >>> 28; - out[currentPos++] = (val << 24) >>> 28; - out[currentPos++] = (val << 28) >>> 28; - out[currentPos++] = (valn << 0) >>> 28; - // number : 1, bitwidth : 28 - out[currentPos++] = (valn << 4) >>> 4; - } - - private void decode34(int val, int valn, int[] out, int currentPos) { - // number : 7, bitwidth : 4 - out[currentPos++] = (val << 8) >>> 28; - out[currentPos++] = (val << 12) >>> 28; - out[currentPos++] = (val << 16) >>> 28; - out[currentPos++] = (val << 20) >>> 28; - out[currentPos++] = (val << 24) >>> 28; - out[currentPos++] = (val << 28) >>> 28; - out[currentPos++] = (valn << 0) >>> 28; - // number : 2, bitwidth : 14 - out[currentPos++] = (valn << 4) >>> 18; - out[currentPos++] = (valn << 18) >>> 18; - } - - private void decode33(int val, int valn, int[] out, int currentPos) { - // number : 7, bitwidth : 4 - out[currentPos++] = (val << 8) >>> 28; - out[currentPos++] = (val << 12) >>> 28; - out[currentPos++] = (val << 16) >>> 28; - out[currentPos++] = (val << 20) >>> 28; - out[currentPos++] = (val << 24) >>> 28; - out[currentPos++] = (val << 28) >>> 28; - out[currentPos++] = (valn << 1) >>> 28; - // number : 3, bitwidth : 9 - out[currentPos++] = (valn << 5) >>> 23; - out[currentPos++] = (valn << 14) >>> 23; - out[currentPos++] = (valn << 23) >>> 23; - } - - private void decode32(int val, int valn, int[] out, int currentPos) { - // number : 7, bitwidth : 4 - out[currentPos++] = (val << 8) >>> 28; - out[currentPos++] = (val << 12) >>> 28; - out[currentPos++] = (val << 16) >>> 28; - out[currentPos++] = (val << 20) >>> 28; - out[currentPos++] = (val << 24) >>> 28; - out[currentPos++] = (val << 28) >>> 28; - out[currentPos++] = (valn << 0) >>> 28; - // number : 4, bitwidth : 7 - out[currentPos++] = (valn << 4) >>> 25; - out[currentPos++] = (valn << 11) >>> 25; - out[currentPos++] = (valn << 18) >>> 25; - out[currentPos++] = (valn << 25) >>> 25; - } - - private void decode31(int val, int valn, int[] out, int currentPos) { - // number : 7, bitwidth : 4 - out[currentPos++] = (val << 8) >>> 28; - out[currentPos++] = (val << 12) >>> 28; - out[currentPos++] = (val << 16) >>> 28; - out[currentPos++] = (val << 20) >>> 28; - out[currentPos++] = (val << 24) >>> 28; - out[currentPos++] = (val << 28) >>> 28; - out[currentPos++] = (valn << 3) >>> 28; - // number : 5, bitwidth : 5 - out[currentPos++] = (valn << 7) >>> 27; - out[currentPos++] = (valn << 12) >>> 27; - out[currentPos++] = (valn << 17) >>> 27; - out[currentPos++] = (valn << 22) >>> 27; - out[currentPos++] = (valn << 27) >>> 27; - } - - private void decode30(int val, int valn, int[] out, int currentPos) { - // number : 7, bitwidth : 4 - out[currentPos++] = (val << 8) >>> 28; - out[currentPos++] = (val << 12) >>> 28; - out[currentPos++] = (val << 16) >>> 28; - out[currentPos++] = (val << 20) >>> 28; - out[currentPos++] = (val << 24) >>> 28; - out[currentPos++] = (val << 28) >>> 28; - out[currentPos++] = (valn << 0) >>> 28; - // number : 7, bitwidth : 4 - out[currentPos++] = (valn << 4) >>> 28; - out[currentPos++] = (valn << 8) >>> 28; - out[currentPos++] = (valn << 12) >>> 28; - out[currentPos++] = (valn << 16) >>> 28; - out[currentPos++] = (valn << 20) >>> 28; - out[currentPos++] = (valn << 24) >>> 28; - out[currentPos++] = (valn << 28) >>> 28; - } - - private void decode29(int val, int valn, int[] out, int currentPos) { - // number : 7, bitwidth : 4 - out[currentPos++] = (val << 8) >>> 28; - out[currentPos++] = (val << 12) >>> 28; - out[currentPos++] = (val << 16) >>> 28; - out[currentPos++] = (val << 20) >>> 28; - out[currentPos++] = (val << 24) >>> 28; - out[currentPos++] = (val << 28) >>> 28; - out[currentPos++] = (valn << 1) >>> 28; - // number : 9, bitwidth : 3 - out[currentPos++] = (valn << 5) >>> 29; - out[currentPos++] = (valn << 8) >>> 29; - out[currentPos++] = (valn << 11) >>> 29; - out[currentPos++] = (valn << 14) >>> 29; - out[currentPos++] = (valn << 17) >>> 29; - out[currentPos++] = (valn << 20) >>> 29; - out[currentPos++] = (valn << 23) >>> 29; - out[currentPos++] = (valn << 26) >>> 29; - out[currentPos++] = (valn << 29) >>> 29; - } - - private void decode28(int val, int valn, int[] out, int currentPos) { - // number : 7, bitwidth : 4 - out[currentPos++] = (val << 8) >>> 28; - out[currentPos++] = (val << 12) >>> 28; - out[currentPos++] = (val << 16) >>> 28; - out[currentPos++] = (val << 20) >>> 28; - out[currentPos++] = (val << 24) >>> 28; - out[currentPos++] = (val << 28) >>> 28; - out[currentPos++] = (valn << 0) >>> 28; - // number : 14, bitwidth : 2 - out[currentPos++] = (valn << 4) >>> 30; - out[currentPos++] = (valn << 6) >>> 30; - out[currentPos++] = (valn << 8) >>> 30; - out[currentPos++] = (valn << 10) >>> 30; - out[currentPos++] = (valn << 12) >>> 30; - out[currentPos++] = (valn << 14) >>> 30; - out[currentPos++] = (valn << 16) >>> 30; - out[currentPos++] = (valn << 18) >>> 30; - out[currentPos++] = (valn << 20) >>> 30; - out[currentPos++] = (valn << 22) >>> 30; // 10 - out[currentPos++] = (valn << 24) >>> 30; - out[currentPos++] = (valn << 26) >>> 30; - out[currentPos++] = (valn << 28) >>> 30; - out[currentPos++] = (valn << 30) >>> 30; - } - - private void decode27(int val, int valn, int[] out, int currentPos) { - // number : 7, bitwidth : 4 - out[currentPos++] = (val << 8) >>> 28; - out[currentPos++] = (val << 12) >>> 28; - out[currentPos++] = (val << 16) >>> 28; - out[currentPos++] = (val << 20) >>> 28; - out[currentPos++] = (val << 24) >>> 28; - out[currentPos++] = (val << 28) >>> 28; - out[currentPos++] = (valn << 0) >>> 28; - // number : 28, bitwidth : 1 - out[currentPos++] = (valn << 4) >>> 31; - out[currentPos++] = (valn << 5) >>> 31; - out[currentPos++] = (valn << 6) >>> 31; - out[currentPos++] = (valn << 7) >>> 31; - out[currentPos++] = (valn << 8) >>> 31; - out[currentPos++] = (valn << 9) >>> 31; - out[currentPos++] = (valn << 10) >>> 31; - out[currentPos++] = (valn << 11) >>> 31; - out[currentPos++] = (valn << 12) >>> 31; - out[currentPos++] = (valn << 13) >>> 31; // 10 - out[currentPos++] = (valn << 14) >>> 31; - out[currentPos++] = (valn << 15) >>> 31; - out[currentPos++] = (valn << 16) >>> 31; - out[currentPos++] = (valn << 17) >>> 31; - out[currentPos++] = (valn << 18) >>> 31; - out[currentPos++] = (valn << 19) >>> 31; - out[currentPos++] = (valn << 20) >>> 31; - out[currentPos++] = (valn << 21) >>> 31; - out[currentPos++] = (valn << 22) >>> 31; - out[currentPos++] = (valn << 23) >>> 31; // 20 - out[currentPos++] = (valn << 24) >>> 31; - out[currentPos++] = (valn << 25) >>> 31; - out[currentPos++] = (valn << 26) >>> 31; - out[currentPos++] = (valn << 27) >>> 31; - out[currentPos++] = (valn << 28) >>> 31; - out[currentPos++] = (valn << 29) >>> 31; - out[currentPos++] = (valn << 30) >>> 31; - out[currentPos++] = (valn << 31) >>> 31; - } - - private void decode26(int val, int valn, int[] out, int currentPos) { - // number : 9, bitwidth : 3 - out[currentPos++] = (val << 8) >>> 29; - out[currentPos++] = (val << 11) >>> 29; - out[currentPos++] = (val << 14) >>> 29; - out[currentPos++] = (val << 17) >>> 29; - out[currentPos++] = (val << 20) >>> 29; - out[currentPos++] = (val << 23) >>> 29; - out[currentPos++] = (val << 26) >>> 29; - out[currentPos++] = (val << 29) >>> 29; - out[currentPos++] = (valn << 1) >>> 29; - // number : 1, bitwidth : 28 - out[currentPos++] = (valn << 4) >>> 4; - } - - private void decode25(int val, int valn, int[] out, int currentPos) { - // number : 9, bitwidth : 3 - out[currentPos++] = (val << 8) >>> 29; - out[currentPos++] = (val << 11) >>> 29; - out[currentPos++] = (val << 14) >>> 29; - out[currentPos++] = (val << 17) >>> 29; - out[currentPos++] = (val << 20) >>> 29; - out[currentPos++] = (val << 23) >>> 29; - out[currentPos++] = (val << 26) >>> 29; - out[currentPos++] = (val << 29) >>> 29; - out[currentPos++] = (valn << 1) >>> 29; - // number : 2, bitwidth : 14 - out[currentPos++] = (valn << 4) >>> 18; - out[currentPos++] = (valn << 18) >>> 18; - } - - private void decode24(int val, int valn, int[] out, int currentPos) { - // number : 9, bitwidth : 3 - out[currentPos++] = (val << 8) >>> 29; - out[currentPos++] = (val << 11) >>> 29; - out[currentPos++] = (val << 14) >>> 29; - out[currentPos++] = (val << 17) >>> 29; - out[currentPos++] = (val << 20) >>> 29; - out[currentPos++] = (val << 23) >>> 29; - out[currentPos++] = (val << 26) >>> 29; - out[currentPos++] = (val << 29) >>> 29; - out[currentPos++] = (valn << 2) >>> 29; - // number : 3, bitwidth : 9 - out[currentPos++] = (valn << 5) >>> 23; - out[currentPos++] = (valn << 14) >>> 23; - out[currentPos++] = (valn << 23) >>> 23; - } - - private void decode23(int val, int valn, int[] out, int currentPos) { - // number : 9, bitwidth : 3 - out[currentPos++] = (val << 8) >>> 29; - out[currentPos++] = (val << 11) >>> 29; - out[currentPos++] = (val << 14) >>> 29; - out[currentPos++] = (val << 17) >>> 29; - out[currentPos++] = (val << 20) >>> 29; - out[currentPos++] = (val << 23) >>> 29; - out[currentPos++] = (val << 26) >>> 29; - out[currentPos++] = (val << 29) >>> 29; - out[currentPos++] = (valn << 1) >>> 29; - // number : 4, bitwidth : 7 - out[currentPos++] = (valn << 4) >>> 25; - out[currentPos++] = (valn << 11) >>> 25; - out[currentPos++] = (valn << 18) >>> 25; - out[currentPos++] = (valn << 25) >>> 25; - } - - private void decode22(int val, int valn, int[] out, int currentPos) { - // number : 9, bitwidth : 3 - out[currentPos++] = (val << 8) >>> 29; - out[currentPos++] = (val << 11) >>> 29; - out[currentPos++] = (val << 14) >>> 29; - out[currentPos++] = (val << 17) >>> 29; - out[currentPos++] = (val << 20) >>> 29; - out[currentPos++] = (val << 23) >>> 29; - out[currentPos++] = (val << 26) >>> 29; - out[currentPos++] = (val << 29) >>> 29; - out[currentPos++] = (valn << 4) >>> 29; - // number : 5, bitwidth : 5 - out[currentPos++] = (valn << 7) >>> 27; - out[currentPos++] = (valn << 12) >>> 27; - out[currentPos++] = (valn << 17) >>> 27; - out[currentPos++] = (valn << 22) >>> 27; - out[currentPos++] = (valn << 27) >>> 27; - } - - private void decode21(int val, int valn, int[] out, int currentPos) { - // number : 9, bitwidth : 3 - out[currentPos++] = (val << 8) >>> 29; - out[currentPos++] = (val << 11) >>> 29; - out[currentPos++] = (val << 14) >>> 29; - out[currentPos++] = (val << 17) >>> 29; - out[currentPos++] = (val << 20) >>> 29; - out[currentPos++] = (val << 23) >>> 29; - out[currentPos++] = (val << 26) >>> 29; - out[currentPos++] = (val << 29) >>> 29; - out[currentPos++] = (valn << 1) >>> 29; - // number : 7, bitwidth : 4 - out[currentPos++] = (valn << 4) >>> 28; - out[currentPos++] = (valn << 8) >>> 28; - out[currentPos++] = (valn << 12) >>> 28; - out[currentPos++] = (valn << 16) >>> 28; - out[currentPos++] = (valn << 20) >>> 28; - out[currentPos++] = (valn << 24) >>> 28; - out[currentPos++] = (valn << 28) >>> 28; - } - - private void decode20(int val, int valn, int[] out, int currentPos) { - // number : 9, bitwidth : 3 - out[currentPos++] = (val << 8) >>> 29; - out[currentPos++] = (val << 11) >>> 29; - out[currentPos++] = (val << 14) >>> 29; - out[currentPos++] = (val << 17) >>> 29; - out[currentPos++] = (val << 20) >>> 29; - out[currentPos++] = (val << 23) >>> 29; - out[currentPos++] = (val << 26) >>> 29; - out[currentPos++] = (val << 29) >>> 29; - out[currentPos++] = (valn << 2) >>> 29; - // number : 9, bitwidth : 3 - out[currentPos++] = (valn << 5) >>> 29; - out[currentPos++] = (valn << 8) >>> 29; - out[currentPos++] = (valn << 11) >>> 29; - out[currentPos++] = (valn << 14) >>> 29; - out[currentPos++] = (valn << 17) >>> 29; - out[currentPos++] = (valn << 20) >>> 29; - out[currentPos++] = (valn << 23) >>> 29; - out[currentPos++] = (valn << 26) >>> 29; - out[currentPos++] = (valn << 29) >>> 29; - } - - private void decode19(int val, int valn, int[] out, int currentPos) { - // number : 9, bitwidth : 3 - out[currentPos++] = (val << 8) >>> 29; - out[currentPos++] = (val << 11) >>> 29; - out[currentPos++] = (val << 14) >>> 29; - out[currentPos++] = (val << 17) >>> 29; - out[currentPos++] = (val << 20) >>> 29; - out[currentPos++] = (val << 23) >>> 29; - out[currentPos++] = (val << 26) >>> 29; - out[currentPos++] = (val << 29) >>> 29; - out[currentPos++] = (valn << 1) >>> 29; - // number : 14, bitwidth : 2 - out[currentPos++] = (valn << 4) >>> 30; - out[currentPos++] = (valn << 6) >>> 30; - out[currentPos++] = (valn << 8) >>> 30; - out[currentPos++] = (valn << 10) >>> 30; - out[currentPos++] = (valn << 12) >>> 30; - out[currentPos++] = (valn << 14) >>> 30; - out[currentPos++] = (valn << 16) >>> 30; - out[currentPos++] = (valn << 18) >>> 30; - out[currentPos++] = (valn << 20) >>> 30; - out[currentPos++] = (valn << 22) >>> 30; // 10 - out[currentPos++] = (valn << 24) >>> 30; - out[currentPos++] = (valn << 26) >>> 30; - out[currentPos++] = (valn << 28) >>> 30; - out[currentPos++] = (valn << 30) >>> 30; - } - - private void decode18(int val, int valn, int[] out, int currentPos) { - // number : 9, bitwidth : 3 - out[currentPos++] = (val << 8) >>> 29; - out[currentPos++] = (val << 11) >>> 29; - out[currentPos++] = (val << 14) >>> 29; - out[currentPos++] = (val << 17) >>> 29; - out[currentPos++] = (val << 20) >>> 29; - out[currentPos++] = (val << 23) >>> 29; - out[currentPos++] = (val << 26) >>> 29; - out[currentPos++] = (val << 29) >>> 29; - out[currentPos++] = (valn << 1) >>> 29; - // number : 28, bitwidth : 1 - out[currentPos++] = (valn << 4) >>> 31; - out[currentPos++] = (valn << 5) >>> 31; - out[currentPos++] = (valn << 6) >>> 31; - out[currentPos++] = (valn << 7) >>> 31; - out[currentPos++] = (valn << 8) >>> 31; - out[currentPos++] = (valn << 9) >>> 31; - out[currentPos++] = (valn << 10) >>> 31; - out[currentPos++] = (valn << 11) >>> 31; - out[currentPos++] = (valn << 12) >>> 31; - out[currentPos++] = (valn << 13) >>> 31; // 10 - out[currentPos++] = (valn << 14) >>> 31; - out[currentPos++] = (valn << 15) >>> 31; - out[currentPos++] = (valn << 16) >>> 31; - out[currentPos++] = (valn << 17) >>> 31; - out[currentPos++] = (valn << 18) >>> 31; - out[currentPos++] = (valn << 19) >>> 31; - out[currentPos++] = (valn << 20) >>> 31; - out[currentPos++] = (valn << 21) >>> 31; - out[currentPos++] = (valn << 22) >>> 31; - out[currentPos++] = (valn << 23) >>> 31; // 20 - out[currentPos++] = (valn << 24) >>> 31; - out[currentPos++] = (valn << 25) >>> 31; - out[currentPos++] = (valn << 26) >>> 31; - out[currentPos++] = (valn << 27) >>> 31; - out[currentPos++] = (valn << 28) >>> 31; - out[currentPos++] = (valn << 29) >>> 31; - out[currentPos++] = (valn << 30) >>> 31; - out[currentPos++] = (valn << 31) >>> 31; - } - - private void decode17(int val, int valn, int[] out, int currentPos) { - // number : 14, bitwidth : 2 - out[currentPos++] = (val << 8) >>> 30; - out[currentPos++] = (val << 10) >>> 30; - out[currentPos++] = (val << 12) >>> 30; - out[currentPos++] = (val << 14) >>> 30; - out[currentPos++] = (val << 16) >>> 30; - out[currentPos++] = (val << 18) >>> 30; - out[currentPos++] = (val << 20) >>> 30; - out[currentPos++] = (val << 22) >>> 30; // 10 - out[currentPos++] = (val << 24) >>> 30; - out[currentPos++] = (val << 26) >>> 30; - out[currentPos++] = (val << 28) >>> 30; - out[currentPos++] = (val << 30) >>> 30; - out[currentPos++] = (valn << 0) >>> 30; - out[currentPos++] = (valn << 2) >>> 30; - // number : 1, bitwidth : 28 - out[currentPos++] = (valn << 4) >>> 4; - } - - private void decode16(int val, int valn, int[] out, int currentPos) { - // number : 14, bitwidth : 2 - out[currentPos++] = (val << 8) >>> 30; - out[currentPos++] = (val << 10) >>> 30; - out[currentPos++] = (val << 12) >>> 30; - out[currentPos++] = (val << 14) >>> 30; - out[currentPos++] = (val << 16) >>> 30; - out[currentPos++] = (val << 18) >>> 30; - out[currentPos++] = (val << 20) >>> 30; - out[currentPos++] = (val << 22) >>> 30; // 10 - out[currentPos++] = (val << 24) >>> 30; - out[currentPos++] = (val << 26) >>> 30; - out[currentPos++] = (val << 28) >>> 30; - out[currentPos++] = (val << 30) >>> 30; - out[currentPos++] = (valn << 0) >>> 30; - out[currentPos++] = (valn << 2) >>> 30; - // number : 2, bitwidth : 14 - out[currentPos++] = (valn << 4) >>> 18; - out[currentPos++] = (valn << 18) >>> 18; - } - - private void decode15(int val, int valn, int[] out, int currentPos) { - // number : 14, bitwidth : 2 - out[currentPos++] = (val << 8) >>> 30; - out[currentPos++] = (val << 10) >>> 30; - out[currentPos++] = (val << 12) >>> 30; - out[currentPos++] = (val << 14) >>> 30; - out[currentPos++] = (val << 16) >>> 30; - out[currentPos++] = (val << 18) >>> 30; - out[currentPos++] = (val << 20) >>> 30; - out[currentPos++] = (val << 22) >>> 30; // 10 - out[currentPos++] = (val << 24) >>> 30; - out[currentPos++] = (val << 26) >>> 30; - out[currentPos++] = (val << 28) >>> 30; - out[currentPos++] = (val << 30) >>> 30; - out[currentPos++] = (valn << 1) >>> 30; - out[currentPos++] = (valn << 3) >>> 30; - // number : 3, bitwidth : 9 - out[currentPos++] = (valn << 5) >>> 23; - out[currentPos++] = (valn << 14) >>> 23; - out[currentPos++] = (valn << 23) >>> 23; - } - - private void decode14(int val, int valn, int[] out, int currentPos) { - // number : 14, bitwidth : 2 - out[currentPos++] = (val << 8) >>> 30; - out[currentPos++] = (val << 10) >>> 30; - out[currentPos++] = (val << 12) >>> 30; - out[currentPos++] = (val << 14) >>> 30; - out[currentPos++] = (val << 16) >>> 30; - out[currentPos++] = (val << 18) >>> 30; - out[currentPos++] = (val << 20) >>> 30; - out[currentPos++] = (val << 22) >>> 30; // 10 - out[currentPos++] = (val << 24) >>> 30; - out[currentPos++] = (val << 26) >>> 30; - out[currentPos++] = (val << 28) >>> 30; - out[currentPos++] = (val << 30) >>> 30; - out[currentPos++] = (valn << 0) >>> 30; - out[currentPos++] = (valn << 2) >>> 30; - // number : 4, bitwidth : 7 - out[currentPos++] = (valn << 4) >>> 25; - out[currentPos++] = (valn << 11) >>> 25; - out[currentPos++] = (valn << 18) >>> 25; - out[currentPos++] = (valn << 25) >>> 25; - } - - private void decode13(int val, int valn, int[] out, int currentPos) { - // number : 14, bitwidth : 2 - out[currentPos++] = (val << 8) >>> 30; - out[currentPos++] = (val << 10) >>> 30; - out[currentPos++] = (val << 12) >>> 30; - out[currentPos++] = (val << 14) >>> 30; - out[currentPos++] = (val << 16) >>> 30; - out[currentPos++] = (val << 18) >>> 30; - out[currentPos++] = (val << 20) >>> 30; - out[currentPos++] = (val << 22) >>> 30; // 10 - out[currentPos++] = (val << 24) >>> 30; - out[currentPos++] = (val << 26) >>> 30; - out[currentPos++] = (val << 28) >>> 30; - out[currentPos++] = (val << 30) >>> 30; - out[currentPos++] = (valn << 3) >>> 30; - out[currentPos++] = (valn << 5) >>> 30; - // number : 5, bitwidth : 5 - out[currentPos++] = (valn << 7) >>> 27; - out[currentPos++] = (valn << 12) >>> 27; - out[currentPos++] = (valn << 17) >>> 27; - out[currentPos++] = (valn << 22) >>> 27; - out[currentPos++] = (valn << 27) >>> 27; - - } - - private void decode12(int val, int valn, int[] out, int currentPos) { - // number : 14, bitwidth : 2 - out[currentPos++] = (val << 8) >>> 30; - out[currentPos++] = (val << 10) >>> 30; - out[currentPos++] = (val << 12) >>> 30; - out[currentPos++] = (val << 14) >>> 30; - out[currentPos++] = (val << 16) >>> 30; - out[currentPos++] = (val << 18) >>> 30; - out[currentPos++] = (val << 20) >>> 30; - out[currentPos++] = (val << 22) >>> 30; // 10 - out[currentPos++] = (val << 24) >>> 30; - out[currentPos++] = (val << 26) >>> 30; - out[currentPos++] = (val << 28) >>> 30; - out[currentPos++] = (val << 30) >>> 30; - out[currentPos++] = (valn << 0) >>> 30; - out[currentPos++] = (valn << 2) >>> 30; - // number : 7, bitwidth : 4 - out[currentPos++] = (valn << 4) >>> 28; - out[currentPos++] = (valn << 8) >>> 28; - out[currentPos++] = (valn << 12) >>> 28; - out[currentPos++] = (valn << 16) >>> 28; - out[currentPos++] = (valn << 20) >>> 28; - out[currentPos++] = (valn << 24) >>> 28; - out[currentPos++] = (valn << 28) >>> 28; - - } - - private void decode11(int val, int valn, int[] out, int currentPos) { - // number : 14, bitwidth : 2 - out[currentPos++] = (val << 8) >>> 30; - out[currentPos++] = (val << 10) >>> 30; - out[currentPos++] = (val << 12) >>> 30; - out[currentPos++] = (val << 14) >>> 30; - out[currentPos++] = (val << 16) >>> 30; - out[currentPos++] = (val << 18) >>> 30; - out[currentPos++] = (val << 20) >>> 30; - out[currentPos++] = (val << 22) >>> 30; // 10 - out[currentPos++] = (val << 24) >>> 30; - out[currentPos++] = (val << 26) >>> 30; - out[currentPos++] = (val << 28) >>> 30; - out[currentPos++] = (val << 30) >>> 30; - out[currentPos++] = (valn << 1) >>> 30; - out[currentPos++] = (valn << 3) >>> 30; - // number : 9, bitwidth : 3 - out[currentPos++] = (valn << 5) >>> 29; - out[currentPos++] = (valn << 8) >>> 29; - out[currentPos++] = (valn << 11) >>> 29; - out[currentPos++] = (valn << 14) >>> 29; - out[currentPos++] = (valn << 17) >>> 29; - out[currentPos++] = (valn << 20) >>> 29; - out[currentPos++] = (valn << 23) >>> 29; - out[currentPos++] = (valn << 26) >>> 29; - out[currentPos++] = (valn << 29) >>> 29; - - } - - private void decode10(int val, int valn, int[] out, int currentPos) { - // number : 14, bitwidth : 2 - out[currentPos++] = (val << 8) >>> 30; - out[currentPos++] = (val << 10) >>> 30; - out[currentPos++] = (val << 12) >>> 30; - out[currentPos++] = (val << 14) >>> 30; - out[currentPos++] = (val << 16) >>> 30; - out[currentPos++] = (val << 18) >>> 30; - out[currentPos++] = (val << 20) >>> 30; - out[currentPos++] = (val << 22) >>> 30; // 10 - out[currentPos++] = (val << 24) >>> 30; - out[currentPos++] = (val << 26) >>> 30; - out[currentPos++] = (val << 28) >>> 30; - out[currentPos++] = (val << 30) >>> 30; - out[currentPos++] = (valn << 0) >>> 30; - out[currentPos++] = (valn << 2) >>> 30; - // number : 14, bitwidth : 2 - out[currentPos++] = (valn << 4) >>> 30; - out[currentPos++] = (valn << 6) >>> 30; - out[currentPos++] = (valn << 8) >>> 30; - out[currentPos++] = (valn << 10) >>> 30; - out[currentPos++] = (valn << 12) >>> 30; - out[currentPos++] = (valn << 14) >>> 30; - out[currentPos++] = (valn << 16) >>> 30; - out[currentPos++] = (valn << 18) >>> 30; - out[currentPos++] = (valn << 20) >>> 30; - out[currentPos++] = (valn << 22) >>> 30; // 10 - out[currentPos++] = (valn << 24) >>> 30; - out[currentPos++] = (valn << 26) >>> 30; - out[currentPos++] = (valn << 28) >>> 30; - out[currentPos++] = (valn << 30) >>> 30; - } - - private void decode9(int val, int valn, int[] out, int currentPos) { - // number : 14, bitwidth : 2 - out[currentPos++] = (val << 8) >>> 30; - out[currentPos++] = (val << 10) >>> 30; - out[currentPos++] = (val << 12) >>> 30; - out[currentPos++] = (val << 14) >>> 30; - out[currentPos++] = (val << 16) >>> 30; - out[currentPos++] = (val << 18) >>> 30; - out[currentPos++] = (val << 20) >>> 30; - out[currentPos++] = (val << 22) >>> 30; // 10 - out[currentPos++] = (val << 24) >>> 30; - out[currentPos++] = (val << 26) >>> 30; - out[currentPos++] = (val << 28) >>> 30; - out[currentPos++] = (val << 30) >>> 30; - out[currentPos++] = (valn << 0) >>> 30; - out[currentPos++] = (valn << 2) >>> 30; - // number : 28, bitwidth : 1 - out[currentPos++] = (valn << 4) >>> 31; - out[currentPos++] = (valn << 5) >>> 31; - out[currentPos++] = (valn << 6) >>> 31; - out[currentPos++] = (valn << 7) >>> 31; - out[currentPos++] = (valn << 8) >>> 31; - out[currentPos++] = (valn << 9) >>> 31; - out[currentPos++] = (valn << 10) >>> 31; - out[currentPos++] = (valn << 11) >>> 31; - out[currentPos++] = (valn << 12) >>> 31; - out[currentPos++] = (valn << 13) >>> 31; // 10 - out[currentPos++] = (valn << 14) >>> 31; - out[currentPos++] = (valn << 15) >>> 31; - out[currentPos++] = (valn << 16) >>> 31; - out[currentPos++] = (valn << 17) >>> 31; - out[currentPos++] = (valn << 18) >>> 31; - out[currentPos++] = (valn << 19) >>> 31; - out[currentPos++] = (valn << 20) >>> 31; - out[currentPos++] = (valn << 21) >>> 31; - out[currentPos++] = (valn << 22) >>> 31; - out[currentPos++] = (valn << 23) >>> 31; // 20 - out[currentPos++] = (valn << 24) >>> 31; - out[currentPos++] = (valn << 25) >>> 31; - out[currentPos++] = (valn << 26) >>> 31; - out[currentPos++] = (valn << 27) >>> 31; - out[currentPos++] = (valn << 28) >>> 31; - out[currentPos++] = (valn << 29) >>> 31; - out[currentPos++] = (valn << 30) >>> 31; - out[currentPos++] = (valn << 31) >>> 31; - } - - private void decode8(int val, int valn, int[] out, int currentPos) { - // number : 28, bitwidth : 1 - out[currentPos++] = (val << 8) >>> 31; - out[currentPos++] = (val << 9) >>> 31; - out[currentPos++] = (val << 10) >>> 31; - out[currentPos++] = (val << 11) >>> 31; - out[currentPos++] = (val << 12) >>> 31; - out[currentPos++] = (val << 13) >>> 31; // 10 - out[currentPos++] = (val << 14) >>> 31; - out[currentPos++] = (val << 15) >>> 31; - out[currentPos++] = (val << 16) >>> 31; - out[currentPos++] = (val << 17) >>> 31; - out[currentPos++] = (val << 18) >>> 31; - out[currentPos++] = (val << 19) >>> 31; - out[currentPos++] = (val << 20) >>> 31; - out[currentPos++] = (val << 21) >>> 31; - out[currentPos++] = (val << 22) >>> 31; - out[currentPos++] = (val << 23) >>> 31; // 20 - out[currentPos++] = (val << 24) >>> 31; - out[currentPos++] = (val << 25) >>> 31; - out[currentPos++] = (val << 26) >>> 31; - out[currentPos++] = (val << 27) >>> 31; - out[currentPos++] = (val << 28) >>> 31; - out[currentPos++] = (val << 29) >>> 31; - out[currentPos++] = (val << 30) >>> 31; - out[currentPos++] = (val << 31) >>> 31; - out[currentPos++] = valn >>> 31; - out[currentPos++] = (valn << 1) >>> 31; - out[currentPos++] = (valn << 2) >>> 31; - out[currentPos++] = (valn << 3) >>> 31; - // number : 1, bitwidth : 28 - out[currentPos++] = (valn << 4) >>> 4; - } - - private void decode7(int val, int valn, int[] out, int currentPos) { - // number : 28, bitwidth : 1 - out[currentPos++] = (val << 8) >>> 31; - out[currentPos++] = (val << 9) >>> 31; - out[currentPos++] = (val << 10) >>> 31; - out[currentPos++] = (val << 11) >>> 31; - out[currentPos++] = (val << 12) >>> 31; - out[currentPos++] = (val << 13) >>> 31; // 10 - out[currentPos++] = (val << 14) >>> 31; - out[currentPos++] = (val << 15) >>> 31; - out[currentPos++] = (val << 16) >>> 31; - out[currentPos++] = (val << 17) >>> 31; - out[currentPos++] = (val << 18) >>> 31; - out[currentPos++] = (val << 19) >>> 31; - out[currentPos++] = (val << 20) >>> 31; - out[currentPos++] = (val << 21) >>> 31; - out[currentPos++] = (val << 22) >>> 31; - out[currentPos++] = (val << 23) >>> 31; // 20 - out[currentPos++] = (val << 24) >>> 31; - out[currentPos++] = (val << 25) >>> 31; - out[currentPos++] = (val << 26) >>> 31; - out[currentPos++] = (val << 27) >>> 31; - out[currentPos++] = (val << 28) >>> 31; - out[currentPos++] = (val << 29) >>> 31; - out[currentPos++] = (val << 30) >>> 31; - out[currentPos++] = (val << 31) >>> 31; - out[currentPos++] = valn >>> 31; - out[currentPos++] = (valn << 1) >>> 31; - out[currentPos++] = (valn << 2) >>> 31; - out[currentPos++] = (valn << 3) >>> 31; - // number : 2, bitwidth : 14 - out[currentPos++] = (valn << 4) >>> 18; - out[currentPos++] = (valn << 18) >>> 18; - } - - private void decode6(int val, int valn, int[] out, int currentPos) { - // number : 28, bitwidth : 1 - out[currentPos++] = (val << 8) >>> 31; - out[currentPos++] = (val << 9) >>> 31; - out[currentPos++] = (val << 10) >>> 31; - out[currentPos++] = (val << 11) >>> 31; - out[currentPos++] = (val << 12) >>> 31; - out[currentPos++] = (val << 13) >>> 31; // 10 - out[currentPos++] = (val << 14) >>> 31; - out[currentPos++] = (val << 15) >>> 31; - out[currentPos++] = (val << 16) >>> 31; - out[currentPos++] = (val << 17) >>> 31; - out[currentPos++] = (val << 18) >>> 31; - out[currentPos++] = (val << 19) >>> 31; - out[currentPos++] = (val << 20) >>> 31; - out[currentPos++] = (val << 21) >>> 31; - out[currentPos++] = (val << 22) >>> 31; - out[currentPos++] = (val << 23) >>> 31; // 20 - out[currentPos++] = (val << 24) >>> 31; - out[currentPos++] = (val << 25) >>> 31; - out[currentPos++] = (val << 26) >>> 31; - out[currentPos++] = (val << 27) >>> 31; - out[currentPos++] = (val << 28) >>> 31; - out[currentPos++] = (val << 29) >>> 31; - out[currentPos++] = (val << 30) >>> 31; - out[currentPos++] = (val << 31) >>> 31; - out[currentPos++] = (valn << 1) >>> 31; - out[currentPos++] = (valn << 2) >>> 31; - out[currentPos++] = (valn << 3) >>> 31; - out[currentPos++] = (valn << 4) >>> 31; - // number : 3, bitwidth : 9 - out[currentPos++] = (valn << 5) >>> 23; - out[currentPos++] = (valn << 14) >>> 23; - out[currentPos++] = (valn << 23) >>> 23; - } - - private void decode5(int val, int valn, int[] out, int currentPos) { - // number : 28, bitwidth : 1 - out[currentPos++] = (val << 8) >>> 31; - out[currentPos++] = (val << 9) >>> 31; - out[currentPos++] = (val << 10) >>> 31; - out[currentPos++] = (val << 11) >>> 31; - out[currentPos++] = (val << 12) >>> 31; - out[currentPos++] = (val << 13) >>> 31; // 10 - out[currentPos++] = (val << 14) >>> 31; - out[currentPos++] = (val << 15) >>> 31; - out[currentPos++] = (val << 16) >>> 31; - out[currentPos++] = (val << 17) >>> 31; - out[currentPos++] = (val << 18) >>> 31; - out[currentPos++] = (val << 19) >>> 31; - out[currentPos++] = (val << 20) >>> 31; - out[currentPos++] = (val << 21) >>> 31; - out[currentPos++] = (val << 22) >>> 31; - out[currentPos++] = (val << 23) >>> 31; // 20 - out[currentPos++] = (val << 24) >>> 31; - out[currentPos++] = (val << 25) >>> 31; - out[currentPos++] = (val << 26) >>> 31; - out[currentPos++] = (val << 27) >>> 31; - out[currentPos++] = (val << 28) >>> 31; - out[currentPos++] = (val << 29) >>> 31; - out[currentPos++] = (val << 30) >>> 31; - out[currentPos++] = (val << 31) >>> 31; - out[currentPos++] = valn >>> 31; - out[currentPos++] = (valn << 1) >>> 31; - out[currentPos++] = (valn << 2) >>> 31; - out[currentPos++] = (valn << 3) >>> 31; - // number : 4, bitwidth : 7 - out[currentPos++] = (valn << 4) >>> 25; - out[currentPos++] = (valn << 11) >>> 25; - out[currentPos++] = (valn << 18) >>> 25; - out[currentPos++] = (valn << 25) >>> 25; - } - - private void decode4(int val, int valn, int[] out, int currentPos) { - // number : 28, bitwidth : 1 - out[currentPos++] = (val << 8) >>> 31; - out[currentPos++] = (val << 9) >>> 31; - out[currentPos++] = (val << 10) >>> 31; - out[currentPos++] = (val << 11) >>> 31; - out[currentPos++] = (val << 12) >>> 31; - out[currentPos++] = (val << 13) >>> 31; // 10 - out[currentPos++] = (val << 14) >>> 31; - out[currentPos++] = (val << 15) >>> 31; - out[currentPos++] = (val << 16) >>> 31; - out[currentPos++] = (val << 17) >>> 31; - out[currentPos++] = (val << 18) >>> 31; - out[currentPos++] = (val << 19) >>> 31; - out[currentPos++] = (val << 20) >>> 31; - out[currentPos++] = (val << 21) >>> 31; - out[currentPos++] = (val << 22) >>> 31; - out[currentPos++] = (val << 23) >>> 31; // 20 - out[currentPos++] = (val << 24) >>> 31; - out[currentPos++] = (val << 25) >>> 31; - out[currentPos++] = (val << 26) >>> 31; - out[currentPos++] = (val << 27) >>> 31; - out[currentPos++] = (val << 28) >>> 31; - out[currentPos++] = (val << 29) >>> 31; - out[currentPos++] = (val << 30) >>> 31; - out[currentPos++] = (val << 31) >>> 31; - out[currentPos++] = (valn << 3) >>> 31;// 头部3bit - out[currentPos++] = (valn << 4) >>> 31; - out[currentPos++] = (valn << 5) >>> 31; - out[currentPos++] = (valn << 6) >>> 31; - // number : 5, bitwidth : 5 - out[currentPos++] = (valn << 7) >>> 27; - out[currentPos++] = (valn << 12) >>> 27; - out[currentPos++] = (valn << 17) >>> 27; - out[currentPos++] = (valn << 22) >>> 27; - out[currentPos++] = (valn << 27) >>> 27; - } - - private void decode3(int val, int valn, int[] out, int currentPos) { - // number : 28, bitwidth : 1 - out[currentPos++] = (val << 8) >>> 31; - out[currentPos++] = (val << 9) >>> 31; - out[currentPos++] = (val << 10) >>> 31; - out[currentPos++] = (val << 11) >>> 31; - out[currentPos++] = (val << 12) >>> 31; - out[currentPos++] = (val << 13) >>> 31; // 10 - out[currentPos++] = (val << 14) >>> 31; - out[currentPos++] = (val << 15) >>> 31; - out[currentPos++] = (val << 16) >>> 31; - out[currentPos++] = (val << 17) >>> 31; - out[currentPos++] = (val << 18) >>> 31; - out[currentPos++] = (val << 19) >>> 31; - out[currentPos++] = (val << 20) >>> 31; - out[currentPos++] = (val << 21) >>> 31; - out[currentPos++] = (val << 22) >>> 31; - out[currentPos++] = (val << 23) >>> 31; // 20 - out[currentPos++] = (val << 24) >>> 31; - out[currentPos++] = (val << 25) >>> 31; - out[currentPos++] = (val << 26) >>> 31; - out[currentPos++] = (val << 27) >>> 31; - out[currentPos++] = (val << 28) >>> 31; - out[currentPos++] = (val << 29) >>> 31; - out[currentPos++] = (val << 30) >>> 31; - out[currentPos++] = (val << 31) >>> 31; - out[currentPos++] = valn >>> 31; - out[currentPos++] = (valn << 1) >>> 31; - out[currentPos++] = (valn << 2) >>> 31; - out[currentPos++] = (valn << 3) >>> 31; - // number : 7, bitwidth : 4 - out[currentPos++] = (valn << 4) >>> 28; - out[currentPos++] = (valn << 8) >>> 28; - out[currentPos++] = (valn << 12) >>> 28; - out[currentPos++] = (valn << 16) >>> 28; - out[currentPos++] = (valn << 20) >>> 28; - out[currentPos++] = (valn << 24) >>> 28; - out[currentPos++] = (valn << 28) >>> 28; - } - - private void decode2(int val, int valn, int[] out, int currentPos) { - // number : 28, bitwidth : 1 - out[currentPos++] = (val << 8) >>> 31; - out[currentPos++] = (val << 9) >>> 31; - out[currentPos++] = (val << 10) >>> 31; - out[currentPos++] = (val << 11) >>> 31; - out[currentPos++] = (val << 12) >>> 31; - out[currentPos++] = (val << 13) >>> 31; // 10 - out[currentPos++] = (val << 14) >>> 31; - out[currentPos++] = (val << 15) >>> 31; - out[currentPos++] = (val << 16) >>> 31; - out[currentPos++] = (val << 17) >>> 31; - out[currentPos++] = (val << 18) >>> 31; - out[currentPos++] = (val << 19) >>> 31; - out[currentPos++] = (val << 20) >>> 31; - out[currentPos++] = (val << 21) >>> 31; - out[currentPos++] = (val << 22) >>> 31; - out[currentPos++] = (val << 23) >>> 31; // 20 - out[currentPos++] = (val << 24) >>> 31; - out[currentPos++] = (val << 25) >>> 31; - out[currentPos++] = (val << 26) >>> 31; - out[currentPos++] = (val << 27) >>> 31; - out[currentPos++] = (val << 28) >>> 31; - out[currentPos++] = (val << 29) >>> 31; - out[currentPos++] = (val << 30) >>> 31; - out[currentPos++] = (val << 31) >>> 31; - out[currentPos++] = (valn << 1) >>> 31;// 头部1bit - out[currentPos++] = (valn << 2) >>> 31; - out[currentPos++] = (valn << 3) >>> 31; - out[currentPos++] = (valn << 4) >>> 31; - // number : 9, bitwidth : 3 - out[currentPos++] = (valn << 5) >>> 29; - out[currentPos++] = (valn << 8) >>> 29; - out[currentPos++] = (valn << 11) >>> 29; - out[currentPos++] = (valn << 14) >>> 29; - out[currentPos++] = (valn << 17) >>> 29; - out[currentPos++] = (valn << 20) >>> 29; - out[currentPos++] = (valn << 23) >>> 29; - out[currentPos++] = (valn << 26) >>> 29; - out[currentPos++] = (valn << 29) >>> 29; - } - - private void decode1(int val, int valn, int[] out, int currentPos) { - // number : 28, bitwidth : 1 - out[currentPos++] = (val << 8) >>> 31; - out[currentPos++] = (val << 9) >>> 31; - out[currentPos++] = (val << 10) >>> 31; - out[currentPos++] = (val << 11) >>> 31; - out[currentPos++] = (val << 12) >>> 31; - out[currentPos++] = (val << 13) >>> 31; // 10 - out[currentPos++] = (val << 14) >>> 31; - out[currentPos++] = (val << 15) >>> 31; - out[currentPos++] = (val << 16) >>> 31; - out[currentPos++] = (val << 17) >>> 31; - out[currentPos++] = (val << 18) >>> 31; - out[currentPos++] = (val << 19) >>> 31; - out[currentPos++] = (val << 20) >>> 31; - out[currentPos++] = (val << 21) >>> 31; - out[currentPos++] = (val << 22) >>> 31; - out[currentPos++] = (val << 23) >>> 31;// 20 - out[currentPos++] = (val << 24) >>> 31; - out[currentPos++] = (val << 25) >>> 31; - out[currentPos++] = (val << 26) >>> 31; - out[currentPos++] = (val << 27) >>> 31; - out[currentPos++] = (val << 28) >>> 31; - out[currentPos++] = (val << 29) >>> 31; - out[currentPos++] = (val << 30) >>> 31; - out[currentPos++] = (val << 31) >>> 31; - out[currentPos++] = valn >>> 31; - out[currentPos++] = (valn << 1) >>> 31; - out[currentPos++] = (valn << 2) >>> 31; - out[currentPos++] = (valn << 3) >>> 31; - // number : 14, bitwidth : 2 - out[currentPos++] = (valn << 4) >>> 30; - out[currentPos++] = (valn << 6) >>> 30; - out[currentPos++] = (valn << 8) >>> 30; - out[currentPos++] = (valn << 10) >>> 30; - out[currentPos++] = (valn << 12) >>> 30; - out[currentPos++] = (valn << 14) >>> 30; - out[currentPos++] = (valn << 16) >>> 30; - out[currentPos++] = (valn << 18) >>> 30; - out[currentPos++] = (valn << 20) >>> 30; - out[currentPos++] = (valn << 22) >>> 30; // 10 - out[currentPos++] = (valn << 24) >>> 30; - out[currentPos++] = (valn << 26) >>> 30; - out[currentPos++] = (valn << 28) >>> 30; - out[currentPos++] = (valn << 30) >>> 30; - } - - private void decode0(int val, int valn, int[] out, int currentPos) { - // number : 28, bitwidth : 1 - out[currentPos++] = (val << 8) >>> 31; - out[currentPos++] = (val << 9) >>> 31; - out[currentPos++] = (val << 10) >>> 31; - out[currentPos++] = (val << 11) >>> 31; - out[currentPos++] = (val << 12) >>> 31; - out[currentPos++] = (val << 13) >>> 31; // 10 - out[currentPos++] = (val << 14) >>> 31; - out[currentPos++] = (val << 15) >>> 31; - out[currentPos++] = (val << 16) >>> 31; - out[currentPos++] = (val << 17) >>> 31; - out[currentPos++] = (val << 18) >>> 31; - out[currentPos++] = (val << 19) >>> 31; - out[currentPos++] = (val << 20) >>> 31; - out[currentPos++] = (val << 21) >>> 31; - out[currentPos++] = (val << 22) >>> 31; - out[currentPos++] = (val << 23) >>> 31; // 20 - out[currentPos++] = (val << 24) >>> 31; - out[currentPos++] = (val << 25) >>> 31; - out[currentPos++] = (val << 26) >>> 31; - out[currentPos++] = (val << 27) >>> 31; - out[currentPos++] = (val << 28) >>> 31; - out[currentPos++] = (val << 29) >>> 31; - out[currentPos++] = (val << 30) >>> 31; - out[currentPos++] = (val << 31) >>> 31; - out[currentPos++] = valn >>> 31; - out[currentPos++] = (valn << 1) >>> 31; - out[currentPos++] = (valn << 2) >>> 31; - out[currentPos++] = (valn << 3) >>> 31; - // number : 28, bitwidth : 1 - out[currentPos++] = (valn << 4) >>> 31; - out[currentPos++] = (valn << 5) >>> 31; - out[currentPos++] = (valn << 6) >>> 31; - out[currentPos++] = (valn << 7) >>> 31; - out[currentPos++] = (valn << 8) >>> 31; - out[currentPos++] = (valn << 9) >>> 31; - out[currentPos++] = (valn << 10) >>> 31; - out[currentPos++] = (valn << 11) >>> 31; - out[currentPos++] = (valn << 12) >>> 31; - out[currentPos++] = (valn << 13) >>> 31; // 10 - out[currentPos++] = (valn << 14) >>> 31; - out[currentPos++] = (valn << 15) >>> 31; - out[currentPos++] = (valn << 16) >>> 31; - out[currentPos++] = (valn << 17) >>> 31; - out[currentPos++] = (valn << 18) >>> 31; - out[currentPos++] = (valn << 19) >>> 31; - out[currentPos++] = (valn << 20) >>> 31; - out[currentPos++] = (valn << 21) >>> 31; - out[currentPos++] = (valn << 22) >>> 31; - out[currentPos++] = (valn << 23) >>> 31; // 20 - out[currentPos++] = (valn << 24) >>> 31; - out[currentPos++] = (valn << 25) >>> 31; - out[currentPos++] = (valn << 26) >>> 31; - out[currentPos++] = (valn << 27) >>> 31; - out[currentPos++] = (valn << 28) >>> 31; - out[currentPos++] = (valn << 29) >>> 31; - out[currentPos++] = (valn << 30) >>> 31; - out[currentPos++] = (valn << 31) >>> 31; - } - - - private final static int bitLength[] = { 1, 2, 3, 4, 5, 7, 9, 14, 28 }; - - private final static int codeNum[] = { 28, 14, 9, 7, 5, 4, 3, 2, 1 }; - - @Override - public String toString() { - return this.getClass().getSimpleName(); - } - - @Override - public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { - int tmpoutpos = outpos.get(); - int currentPos = inpos.get(); - int selector1 = 0; - int selector2 = 0; - final int finalin = currentPos + inlength; - while (currentPos < finalin - 28 * 2) { - int nextCurrentPos = currentPos; - mainloop1: for (selector1=0; selector1 <= 8; selector1++) { - int compressedNum = codeNum[selector1]; - //if (finalin <= nextCurrentPos + compressedNum - 1) - // compressedNum = finalin - nextCurrentPos; - int b = bitLength[selector1]; - int max = 1 << b; - int i = 0; - for (; i < compressedNum; i++) { - if (Util.smallerorequalthan(max, in[nextCurrentPos + i])) - continue mainloop1; - } - nextCurrentPos += compressedNum; - break; - } - mainloop2: for (selector2 = 0; selector2 <= 8; selector2++) { - int compressedNum = codeNum[selector2]; - //if (finalin <= nextCurrentPos + compressedNum - 1) - // compressedNum = finalin - nextCurrentPos; - int b = bitLength[selector2]; - int max = 1 << b; - int i = 0; - for (; i < compressedNum; i++) { - if (Util.smallerorequalthan(max, in[nextCurrentPos + i])) - continue mainloop2; - } - nextCurrentPos += compressedNum; - break; - } - int code = M[selector1][selector2]; - out[tmpoutpos] = 0; - out[tmpoutpos + 1] = 0; - switch (code) { - case 0: - encode0(in, currentPos, code, out, tmpoutpos); - break; - case 1: - encode1(in, currentPos, code, out, tmpoutpos); - break; - case 2: - encode2(in, currentPos, code, out, tmpoutpos); - break; - case 3: - encode3(in, currentPos, code, out, tmpoutpos); - break; - case 4: - encode4(in, currentPos, code, out, tmpoutpos); - break; - case 5: - encode5(in, currentPos, code, out, tmpoutpos); - break; - case 6: - encode6(in, currentPos, code, out, tmpoutpos); - break; - case 7: - encode7(in, currentPos, code, out, tmpoutpos); - break; - case 8: - encode8(in, currentPos, code, out, tmpoutpos); - break; - case 9: - encode9(in, currentPos, code, out, tmpoutpos); - break; - case 10: - encode10(in, currentPos, code, out, tmpoutpos); - break; - case 11: - encode11(in, currentPos, code, out, tmpoutpos); - break; - case 12: - encode12(in, currentPos, code, out, tmpoutpos); - break; - case 13: - encode13(in, currentPos, code, out, tmpoutpos); - break; - case 14: - encode14(in, currentPos, code, out, tmpoutpos); - break; - case 15: - encode15(in, currentPos, code, out, tmpoutpos); - break; - case 16: - encode16(in, currentPos, code, out, tmpoutpos); - break; - case 17: - encode17(in, currentPos, code, out, tmpoutpos); - break; - case 18: - encode18(in, currentPos, code, out, tmpoutpos); - break; - case 19: - encode19(in, currentPos, code, out, tmpoutpos); - break; - case 20: - encode20(in, currentPos, code, out, tmpoutpos); - break; - case 21: - encode21(in, currentPos, code, out, tmpoutpos); - break; - case 22: - encode22(in, currentPos, code, out, tmpoutpos); - break; - case 23: - encode23(in, currentPos, code, out, tmpoutpos); - break; - case 24: - encode24(in, currentPos, code, out, tmpoutpos); - break; - case 25: - encode25(in, currentPos, code, out, tmpoutpos); - break; - case 26: - encode26(in, currentPos, code, out, tmpoutpos); - break; - case 27: - encode27(in, currentPos, code, out, tmpoutpos); - break; - case 28: - encode28(in, currentPos, code, out, tmpoutpos); - break; - case 29: - encode29(in, currentPos, code, out, tmpoutpos); - break; - case 30: - encode30(in, currentPos, code, out, tmpoutpos); - break; - case 31: - encode31(in, currentPos, code, out, tmpoutpos); - break; - case 32: - encode32(in, currentPos, code, out, tmpoutpos); - break; - case 33: - encode33(in, currentPos, code, out, tmpoutpos); - break; - case 34: - encode34(in, currentPos, code, out, tmpoutpos); - break; - case 35: - encode35(in, currentPos, code, out, tmpoutpos); - break; - case 36: - encode36(in, currentPos, code, out, tmpoutpos); - break; - case 37: - encode37(in, currentPos, code, out, tmpoutpos); - break; - case 38: - encode38(in, currentPos, code, out, tmpoutpos); - break; - case 39: - encode39(in, currentPos, code, out, tmpoutpos); - break; - case 40: - encode40(in, currentPos, code, out, tmpoutpos); - break; - case 41: - encode41(in, currentPos, code, out, tmpoutpos); - break; - case 42: - encode42(in, currentPos, code, out, tmpoutpos); - break; - case 43: - encode43(in, currentPos, code, out, tmpoutpos); - break; - case 44: - encode44(in, currentPos, code, out, tmpoutpos); - break; - case 45: - encode45(in, currentPos, code, out, tmpoutpos); - break; - case 46: - encode46(in, currentPos, code, out, tmpoutpos); - break; - case 47: - encode47(in, currentPos, code, out, tmpoutpos); - break; - case 48: - encode48(in, currentPos, code, out, tmpoutpos); - break; - case 49: - encode49(in, currentPos, code, out, tmpoutpos); - break; - case 50: - encode50(in, currentPos, code, out, tmpoutpos); - break; - case 51: - encode51(in, currentPos, code, out, tmpoutpos); - break; - case 52: - encode52(in, currentPos, code, out, tmpoutpos); - break; - case 53: - encode53(in, currentPos, code, out, tmpoutpos); - break; - case 54: - encode54(in, currentPos, code, out, tmpoutpos); - break; - case 55: - encode55(in, currentPos, code, out, tmpoutpos); - break; - case 56: - encode56(in, currentPos, code, out, tmpoutpos); - break; - case 57: - encode57(in, currentPos, code, out, tmpoutpos); - break; - case 58: - encode58(in, currentPos, code, out, tmpoutpos); - break; - case 59: - encode59(in, currentPos, code, out, tmpoutpos); - break; - case 60: - encode60(in, currentPos, code, out, tmpoutpos); - break; - case 61: - encode61(in, currentPos, code, out, tmpoutpos); - break; - case 62: - encode62(in, currentPos, code, out, tmpoutpos); - break; - case 63: - encode63(in, currentPos, code, out, tmpoutpos); - break; - case 64: - encode64(in, currentPos, code, out, tmpoutpos); - break; - case 65: - encode65(in, currentPos, code, out, tmpoutpos); - break; - case 66: - encode66(in, currentPos, code, out, tmpoutpos); - break; - case 67: - encode67(in, currentPos, code, out, tmpoutpos); - break; - case 68: - encode68(in, currentPos, code, out, tmpoutpos); - break; - case 69: - encode69(in, currentPos, code, out, tmpoutpos); - break; - case 70: - encode70(in, currentPos, code, out, tmpoutpos); - break; - case 71: - encode71(in, currentPos, code, out, tmpoutpos); - break; - case 72: - encode72(in, currentPos, code, out, tmpoutpos); - break; - case 73: - encode73(in, currentPos, code, out, tmpoutpos); - break; - case 74: - encode74(in, currentPos, code, out, tmpoutpos); - break; - case 75: - encode75(in, currentPos, code, out, tmpoutpos); - break; - case 76: - encode76(in, currentPos, code, out, tmpoutpos); - break; - case 77: - encode77(in, currentPos, code, out, tmpoutpos); - break; - case 78: - encode78(in, currentPos, code, out, tmpoutpos); - break; - case 79: - encode79(in, currentPos, code, out, tmpoutpos); - break; - case 80: - encode80(in, currentPos, code, out, tmpoutpos); - break; - default: - throw new RuntimeException("unsupported code"); - }// end switch - tmpoutpos += 2; - currentPos = nextCurrentPos; - } - - outer: while (currentPos < finalin) { - mainloop: for (int selector = 0; selector < 8; selector++) { - int res = 0; - int compressedNum = codeNum[selector]; - if (finalin <= currentPos + compressedNum - 1) - compressedNum = finalin - currentPos; - int b = bitLength[selector]; - int max = 1 << b; - int i = 0; - for (; i < compressedNum; i++) { - if (Util.smallerorequalthan(max, in[currentPos + i])) - continue mainloop; - res = (res << b) + in[currentPos + i]; - } - if (compressedNum != codeNum[selector]) { - res <<= (codeNum[selector] - compressedNum) * b; - } - res |= selector << 28; - out[tmpoutpos++] = res; - - currentPos += compressedNum; - continue outer; - } - final int selector = 8; - out[tmpoutpos++] = in[currentPos++] | (selector << 28); - } - inpos.set(currentPos); - outpos.set(tmpoutpos); - } - - @Override - public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos, int num) { - int currentPos = outpos.get(); - int tmpinpos = inpos.get(); - final int finalout = currentPos + num; - while (currentPos < finalout - 2 * 28) { - - int val = in[tmpinpos++]; - int valn = in[tmpinpos++]; - int header = val >>> 24; - switch (header) { - case 0: { - decode0(val, valn, out, currentPos); - currentPos+=56; - break; - } - case 1: { - decode1(val, valn, out, currentPos); - currentPos+=42; - break; - } - case 2: { - decode2(val, valn, out, currentPos); - currentPos+=37; - break; - } - case 3: { - decode3(val, valn, out, currentPos); - currentPos+=35; - break; - } - case 4: { - decode4(val, valn, out, currentPos); - currentPos+=33; - break; - } - case 5: { - decode5(val, valn, out, currentPos); - currentPos+=32; - break; - } - case 6: { - decode6(val, valn, out, currentPos); - currentPos+=31; - break; - } - case 7: { - decode7(val, valn, out, currentPos); - currentPos+=30; - break; - } - case 8: { - decode8(val, valn, out, currentPos); - currentPos+=29; - break; - } - case 9: { - decode9(val, valn, out, currentPos); - currentPos+=42; - break; - } - case 10: { - decode10(val, valn, out, currentPos); - currentPos+=28; - break; - } - case 11: { - decode11(val, valn, out, currentPos); - currentPos+=23; - break; - } - case 12: { - decode12(val, valn, out, currentPos); - currentPos+=21; - break; - } - case 13: { - decode13(val, valn, out, currentPos); - currentPos+=19; - break; - } - case 14: { - decode14(val, valn, out, currentPos); - currentPos+=18; - break; - } - case 15: { - decode15(val, valn, out, currentPos); - currentPos+=17; - break; - } - case 16: { - decode16(val, valn, out, currentPos); - currentPos+=16; - break; - } - case 17: { - decode17(val, valn, out, currentPos); - currentPos+=15; - break; - } - case 18: { - decode18(val, valn, out, currentPos); - currentPos+=37; - break; - } - case 19: { - decode19(val, valn, out, currentPos); - currentPos+=23; - break; - } - case 20: { - decode20(val, valn, out, currentPos); - currentPos+=18; - break; - } - case 21: { - decode21(val, valn, out, currentPos); - currentPos+=16; - break; - } - case 22: { - decode22(val, valn, out, currentPos); - currentPos+=14; - break; - } - case 23: { - decode23(val, valn, out, currentPos); - currentPos+=13; - break; - } - case 24: { - decode24(val, valn, out, currentPos); - currentPos+=12; - break; - } - case 25: { - decode25(val, valn, out, currentPos); - currentPos+=11; - break; - } - case 26: { - decode26(val, valn, out, currentPos); - currentPos+=10; - break; - } - case 27: { - decode27(val, valn, out, currentPos); - currentPos+=35; - break; - } - case 28: { - decode28(val, valn, out, currentPos); - currentPos+=21; - break; - } - case 29: { - decode29(val, valn, out, currentPos); - currentPos+=16; - break; - } - - case 30: { - decode30(val, valn, out, currentPos); - currentPos+=14; - break; - } - case 31: { - decode31(val, valn, out, currentPos); - currentPos+=12; - break; - } - case 32: { - decode32(val, valn, out, currentPos); - currentPos+=11; - break; - } - case 33: { - decode33(val, valn, out, currentPos); - currentPos+=10; - break; - } - case 34: { - decode34(val, valn, out, currentPos); - currentPos+=9; - break; - } - case 35: { - decode35(val, valn, out, currentPos); - currentPos+=8; - break; - } - case 36: { - decode36(val, valn, out, currentPos); - currentPos+=33; - break; - } - case 37: { - decode37(val, valn, out, currentPos); - currentPos+=19; - break; - } - case 38: { - decode38(val, valn, out, currentPos); - currentPos+=14; - break; - } - case 39: { - decode39(val, valn, out, currentPos); - currentPos+=12; - break; - } - case 40: { - decode40(val, valn, out, currentPos); - currentPos+=10; - break; - } - case 41: { - decode41(val, valn, out, currentPos); - currentPos+=9; - break; - } - case 42: { - decode42(val, valn, out, currentPos); - currentPos+=8; - break; - } - case 43: { - decode43(val, valn, out, currentPos); - currentPos+=7; - break; - } - case 44: { - decode44(val, valn, out, currentPos); - currentPos+=6; - break; - } - case 45: { - decode45(val, valn, out, currentPos); - currentPos+=32; - break; - } - case 46: { - decode46(val, valn, out, currentPos); - currentPos+=18; - break; - } - case 47: { - decode47(val, valn, out, currentPos); - currentPos+=13; - break; - } - case 48: { - decode48(val, valn, out, currentPos); - currentPos+=11; - break; - } - case 49: { - decode49(val, valn, out, currentPos); - currentPos+=9; - break; - } - case 50: { - decode50(val, valn, out, currentPos); - currentPos+=8; - break; - } - case 51: { - decode51(val, valn, out, currentPos); - currentPos+=7; - break; - } - case 52: { - decode52(val, valn, out, currentPos); - currentPos+=6; - break; - } - case 53: { - decode53(val, valn, out, currentPos); - currentPos+=5; - break; - } - case 54: { - decode54(val, valn, out, currentPos); - currentPos+=31; - break; - } - case 55: { - decode55(val, valn, out, currentPos); - currentPos+=17; - break; - } - case 56: { - decode56(val, valn, out, currentPos); - currentPos+=12; - break; - } - case 57: { - decode57(val, valn, out, currentPos); - currentPos+=10; - break; - } - case 58: { - decode58(val, valn, out, currentPos); - currentPos+=8; - break; - } - case 59: { - decode59(val, valn, out, currentPos); - currentPos+=7; - break; - } - case 60: { - decode60(val, valn, out, currentPos); - currentPos+=6; - break; - } - case 61: { - decode61(val, valn, out, currentPos); - currentPos+=5; - break; - } - case 62: { - decode62(val, valn, out, currentPos); - currentPos+=4; - break; - } - case 63: { - decode63(val, valn, out, currentPos); - currentPos+=30; - break; - } - case 64: { - decode64(val, valn, out, currentPos); - currentPos+=16; - break; - } - case 65: { - decode65(val, valn, out, currentPos); - currentPos+=11; - break; - } - case 66: { - decode66(val, valn, out, currentPos); - currentPos+=9; - break; - } - case 67: { - decode67(val, valn, out, currentPos); - currentPos+=7; - break; - } - case 68: { - decode68(val, valn, out, currentPos); - currentPos+=6; - break; - } - case 69: { - decode69(val, valn, out, currentPos); - currentPos+=5; - break; - } - case 70: { - decode70(val, valn, out, currentPos); - currentPos+=4; - break; - } - case 71: { - decode71(val, valn, out, currentPos); - currentPos+=3; - break; - } - case 72: { - decode72(val, valn, out, currentPos); - currentPos+=29; - break; - } - case 73: { - decode73(val, valn, out, currentPos); - currentPos+=15; - break; - } - case 74: { - decode74(val, valn, out, currentPos); - currentPos+=10; - break; - } - case 75: { - decode75(val, valn, out, currentPos); - currentPos+=8; - break; - } - case 76: { - decode76(val, valn, out, currentPos); - currentPos+=6; - break; - } - case 77: { - decode77(val, valn, out, currentPos); - currentPos+=5; - break; - } - case 78: { - decode78(val, valn, out, currentPos); - currentPos+=4; - break; - } - case 79: { - decode79(val, valn, out, currentPos); - currentPos+=3; - break; - } - case 80: { - decode80(val, valn, out, currentPos); - currentPos+=2; - break; - } - default: - throw new RuntimeException("Wrong code: " + header); - }// end switch - } // end while - - while (currentPos < finalout) { - int val = in[tmpinpos++]; - int header = val >>> 28; - switch (header) { - case 0: { // number : 28, bitwidth : 1 - final int howmany = finalout - currentPos < 28 ? finalout - currentPos : 28; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (k + 4)) >>> 31; - } - break; - } - case 1: { // number : 14, bitwidth : 2 - final int howmany = finalout - currentPos < 14 ? finalout - currentPos : 14; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (2 * k + 4)) >>> 30; - } - break; - } - case 2: { // number : 9, bitwidth : 3 - final int howmany = finalout - currentPos < 9 ? finalout - currentPos : 9; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (3 * k + 5)) >>> 29; - } - break; - } - case 3: { // number : 7, bitwidth : 4 - final int howmany = finalout - currentPos < 7 ? finalout - currentPos : 7; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (4 * k + 4)) >>> 28; - } - break; - } - case 4: { // number : 5, bitwidth : 5 - final int howmany = finalout - currentPos < 5 ? finalout - currentPos : 5; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (5 * k + 7)) >>> 27; - } - break; - } - case 5: { // number : 4, bitwidth : 7 - final int howmany = finalout - currentPos < 4 ? finalout - currentPos : 4; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (7 * k + 4)) >>> 25; - } - break; - } - case 6: { // number : 3, bitwidth : 9 - final int howmany = finalout - currentPos < 3 ? finalout - currentPos : 3; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (9 * k + 5)) >>> 23; - } - break; - } - case 7: { // number : 2, bitwidth : 14 - final int howmany = finalout - currentPos < 2 ? finalout - currentPos : 2; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (14 * k + 4)) >>> 18; - } - break; - } - case 8: { // number : 1, bitwidth : 28 - out[currentPos++] = (val << 4) >>> 4; - break; - } - default: { - throw new RuntimeException("shouldn't happen"); - } - } - } - - outpos.set(finalout); - inpos.set(tmpinpos); - - } -} \ No newline at end of file + private static final int[][] M = { { 0, 1, 2, 3, 4, 5, 6, 7, 8 }, { 9, 10, 11, 12, 13, 14, 15, 16, 17 }, + { 18, 19, 20, 21, 22, 23, 24, 25, 26 }, { 27, 28, 29, 30, 31, 32, 33, 34, 35 }, + { 36, 37, 38, 39, 40, 41, 42, 43, 44 }, { 45, 46, 47, 48, 49, 50, 51, 52, 53 }, + { 54, 55, 56, 57, 58, 59, 60, 61, 62 }, { 63, 64, 65, 66, 67, 68, 69, 70, 71 }, + { 72, 73, 74, 75, 76, 77, 78, 79, 80 } }; + + @Override + public void compress(int[] in, IntWrapper inpos, int inlength, int out[], IntWrapper outpos) { + if (inlength == 0) + return; + out[outpos.get()] = inlength; + outpos.increment(); + headlessCompress(in, inpos, inlength, out, outpos); + } + + private void encode0(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 24; i++) + out[outf + 0] = (out[outf + 0] << 1) + (in[inf + i]); + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; + for (int i = 0; i < 28; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 28 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode1(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 24; i++) + out[outf + 0] = (out[outf + 0] << 1) + in[inf + i]; + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; + for (int i = 0; i < 14; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 28 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode2(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 24; i++) + out[outf + 0] = (out[outf + 0] << 1) + in[inf + i]; + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; + for (int i = 0; i < 9; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 28 + i];// 第二个28位是低位存储的,所以浪费的1比特在最顶端。 + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode3(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 24; i++) + out[outf + 0] = (out[outf + 0] << 1) + in[inf + i]; + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; + for (int i = 0; i < 7; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 28 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode4(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 24; i++) + out[outf + 0] = (out[outf + 0] << 1) + in[inf + i]; + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; + for (int i = 0; i < 5; i++) + out[outf + 1] = (out[outf + 1] << 5) + in[inf + 28 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode5(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 24; i++) + out[outf + 0] = (out[outf + 0] << 1) + in[inf + i]; + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 7) + in[inf + 28 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode6(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 24; i++) + out[outf + 0] = (out[outf + 0] << 1) + in[inf + i]; + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; + for (int i = 0; i < 3; i++) + out[outf + 1] = (out[outf + 1] << 9) + in[inf + 28 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode7(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 24; i++) + out[outf + 0] = (out[outf + 0] << 1) + in[inf + i]; + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 14) + in[inf + 28 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode8(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 24; i++) + out[outf + 0] = (out[outf + 0] << 1) + in[inf + i]; + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 24 + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 28) + in[inf + 28 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode9(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 12; i++) + out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; + for (int i = 0; i < 28; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 14 + i]; + + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode10(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 12; i++) { + out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; + + } + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; + for (int i = 0; i < 14; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 14 + i]; + + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode11(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 12; i++) + out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; + for (int i = 0; i < 9; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 14 + i]; + + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode12(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 12; i++) + out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; + for (int i = 0; i < 7; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 14 + i]; + + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode13(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 12; i++) + out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; + for (int i = 0; i < 5; i++) + out[outf + 1] = (out[outf + 1] << 5) + in[inf + 14 + i]; + + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode14(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 12; i++) + out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 7) + in[inf + 14 + i]; + + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode15(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 12; i++) + out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; + for (int i = 0; i < 3; i++) + out[outf + 1] = (out[outf + 1] << 9) + in[inf + 14 + i]; + + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode16(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 12; i++) + out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 14) + in[inf + 14 + i]; + + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode17(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 12; i++) + out[outf + 0] = (out[outf + 0] << 2) + in[inf + i]; + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 12 + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 28) + in[inf + 14 + i]; + + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode18(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 8; i++) + out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; + for (int i = 0; i < 28; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 9 + i]; + + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode19(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 8; i++) + out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; + for (int i = 0; i < 14; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 9 + i]; + + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode20(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 8; i++) + out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; + for (int i = 0; i < 9; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 9 + i]; + + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode21(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 8; i++) + out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; + for (int i = 0; i < 7; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 9 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode22(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 8; i++) + out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; + for (int i = 0; i < 5; i++) + out[outf + 1] = (out[outf + 1] << 5) + in[inf + 9 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode23(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 8; i++) + out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 7) + in[inf + 9 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode24(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 8; i++) + out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; + for (int i = 0; i < 3; i++) + out[outf + 1] = (out[outf + 1] << 9) + in[inf + 9 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode25(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 8; i++) + out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 14) + in[inf + 9 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode26(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 8; i++) + out[outf + 0] = (out[outf + 0] << 3) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 8 + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 28) + in[inf + 9 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode27(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 6; i++) + out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; + for (int i = 0; i < 28; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 7 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode28(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 6; i++) + out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; + for (int i = 0; i < 14; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 7 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode29(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 6; i++) + out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; + for (int i = 0; i < 9; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 7 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode30(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 6; i++) + out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; + for (int i = 0; i < 7; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 7 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode31(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 6; i++) + out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; + for (int i = 0; i < 5; i++) + out[outf + 1] = (out[outf + 1] << 5) + in[inf + 7 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode32(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 6; i++) + out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 7) + in[inf + 7 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode33(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 6; i++) + out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; + for (int i = 0; i < 3; i++) + out[outf + 1] = (out[outf + 1] << 9) + in[inf + 7 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode34(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 6; i++) + out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 14) + in[inf + 7 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode35(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 6; i++) + out[outf + 0] = (out[outf + 0] << 4) + in[inf + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 6 + i]; + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 28) + in[inf + 7 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode36(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 4; i++) + out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); + out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); + for (int i = 0; i < 28; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 5 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode37(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 4; i++) + out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); + out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); + for (int i = 0; i < 14; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 5 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode38(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 4; i++) + out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); + out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); + for (int i = 0; i < 9; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 5 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode39(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 4; i++) + out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); + out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); + for (int i = 0; i < 7; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 5 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode40(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 4; i++) + out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); + out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); + for (int i = 0; i < 5; i++) + out[outf + 1] = (out[outf + 1] << 5) + in[inf + 5 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode41(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 4; i++) + out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); + out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 7) + in[inf + 5 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode42(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 4; i++) + out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); + out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); + for (int i = 0; i < 3; i++) + out[outf + 1] = (out[outf + 1] << 9) + in[inf + 5 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode43(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 4; i++) + out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); + out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 14) + in[inf + 5 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode44(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 4; i++) + out[outf + 0] = (out[outf + 0] << 5) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 4) + (in[inf + 4] >>> 1); + out[outf + 1] = (out[outf + 1] << 1) + ((in[inf + 4] << 31) >>> 31); + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 28) + in[inf + 5 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode45(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 3; i++) + out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); + for (int i = 0; i < 28; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 4 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode46(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 3; i++) + out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); + for (int i = 0; i < 14; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 4 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode47(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 3; i++) + out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); + for (int i = 0; i < 9; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 4 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode48(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 3; i++) + out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); + for (int i = 0; i < 7; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 4 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode49(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 3; i++) + out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); + for (int i = 0; i < 5; i++) + out[outf + 1] = (out[outf + 1] << 5) + in[inf + 4 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode50(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 3; i++) + out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 7) + in[inf + 4 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode51(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 3; i++) + out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); + for (int i = 0; i < 3; i++) + out[outf + 1] = (out[outf + 1] << 9) + in[inf + 4 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode52(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 3; i++) + out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 14) + in[inf + 4 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode53(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 3; i++) + out[outf + 0] = (out[outf + 0] << 7) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 3) + (in[inf + 3] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 3] << 28) >>> 28); + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 28) + in[inf + 4 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode54(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 2; i++) + out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); + out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); + for (int i = 0; i < 28; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 3 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode55(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 2; i++) + out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); + out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); + for (int i = 0; i < 14; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 3 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode56(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 2; i++) + out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); + out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); + for (int i = 0; i < 9; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 3 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode57(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 2; i++) + out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); + out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); + for (int i = 0; i < 7; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 3 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode58(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 2; i++) + out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); + out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); + for (int i = 0; i < 5; i++) + out[outf + 1] = (out[outf + 1] << 5) + in[inf + 3 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode59(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 2; i++) + out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); + out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 7) + in[inf + 3 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode60(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 2; i++) + out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); + out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); + for (int i = 0; i < 3; i++) + out[outf + 1] = (out[outf + 1] << 9) + in[inf + 3 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode61(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 2; i++) + out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); + out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 14) + in[inf + 3 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode62(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + for (int i = 0; i < 2; i++) + out[outf + 0] = (out[outf + 0] << 9) + in[inf + i]; + out[outf + 0] = (out[outf + 0] << 6) + (in[inf + 2] >>> 3); + out[outf + 1] = (out[outf + 1] << 3) + ((in[inf + 2] << 29) >>> 29); + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 28) + in[inf + 3 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode63(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + + out[outf + 0] = (out[outf + 0] << 14) + in[inf]; + out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); + for (int i = 0; i < 28; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 2 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode64(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + out[outf + 0] = (out[outf + 0] << 14) + in[inf]; + out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); + for (int i = 0; i < 14; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 2 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode65(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + out[outf + 0] = (out[outf + 0] << 14) + in[inf]; + out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); + for (int i = 0; i < 9; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 2 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode66(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + out[outf + 0] = (out[outf + 0] << 14) + in[inf]; + out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); + for (int i = 0; i < 7; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 2 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode67(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + out[outf + 0] = (out[outf + 0] << 14) + in[inf]; + out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); + for (int i = 0; i < 5; i++) + out[outf + 1] = (out[outf + 1] << 5) + in[inf + 2 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode68(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + out[outf + 0] = (out[outf + 0] << 14) + in[inf]; + out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 7) + in[inf + 2 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode69(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + out[outf + 0] = (out[outf + 0] << 14) + in[inf]; + out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); + for (int i = 0; i < 3; i++) + out[outf + 1] = (out[outf + 1] << 9) + in[inf + 2 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode70(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + out[outf + 0] = (out[outf + 0] << 14) + in[inf]; + out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 14) + in[inf + 2 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode71(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + out[outf + 0] = (out[outf + 0] << 14) + in[inf]; + out[outf + 0] = (out[outf + 0] << 10) + (in[inf + 1] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf + 1] << 28) >>> 28); + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 28) + in[inf + 2 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode72(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + + out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); + for (int i = 0; i < 28; i++) + out[outf + 1] = (out[outf + 1] << 1) + in[inf + 1 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode73(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); + for (int i = 0; i < 14; i++) + out[outf + 1] = (out[outf + 1] << 2) + in[inf + 1 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode74(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); + for (int i = 0; i < 9; i++) + out[outf + 1] = (out[outf + 1] << 3) + in[inf + 1 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode75(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); + for (int i = 0; i < 7; i++) + out[outf + 1] = (out[outf + 1] << 4) + in[inf + 1 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode76(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); + for (int i = 0; i < 5; i++) + out[outf + 1] = (out[outf + 1] << 5) + in[inf + 1 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode77(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); + for (int i = 0; i < 4; i++) + out[outf + 1] = (out[outf + 1] << 7) + in[inf + 1 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode78(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); + for (int i = 0; i < 3; i++) + out[outf + 1] = (out[outf + 1] << 9) + in[inf + 1 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode79(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); + for (int i = 0; i < 2; i++) + out[outf + 1] = (out[outf + 1] << 14) + in[inf + 1 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + private void encode80(final int[] in, final int inf, final int code, final int[] out, + final int outf) { + out[outf + 0] = (out[outf + 0] << 24) + (in[inf] >>> 4); + out[outf + 1] = (out[outf + 1] << 4) + ((in[inf] << 28) >>> 28); + for (int i = 0; i < 1; i++) + out[outf + 1] = (out[outf + 1] << 28) + in[inf + 1 + i]; + out[outf + 0] = code << 24 | out[outf + 0]; + + } + + @Override + public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { + if (inlength == 0) + return; + final int outlength = in[inpos.get()]; + inpos.increment(); + headlessUncompress(in, inpos, inlength, out, outpos, outlength); + } + + + + private void decode80(int val, int valn, int[] out, int currentPos) { + // number : 1, bitwidth : 28 + out[currentPos++] = (val << 8) >>> 4 | (valn >>> 28); + // number : 1, bitwidth : 28 + out[currentPos++] = (valn << 4) >>> 4; + } + + private void decode79(int val, int valn, int[] out, int currentPos) { + // number : 1, bitwidth : 28 + out[currentPos++] = (val << 8) >>> 4 | (valn >>> 28); + // number :2, bitwidth : 14 + out[currentPos++] = (valn << 4) >>> 18; + out[currentPos++] = (valn << 18) >>> 18; + } + + private void decode78(int val, int valn, int[] out, int currentPos) { + // number : 1, bitwidth : 28 + out[currentPos++] = (val << 8) >>> 4 | (valn >>> 27); + // number : 3, bitwidth :9 + out[currentPos++] = (valn << 5) >>> 23; + out[currentPos++] = (valn << 14) >>> 23; + out[currentPos++] = (valn << 23) >>> 23; + } + + private void decode77(int val, int valn, int[] out, int currentPos) { + // number : 1, bitwidth : 28 + out[currentPos++] = (val << 8) >>> 4 | (valn >>> 28); + // number : 4, bitwidth : 7 + out[currentPos++] = (valn << 4) >>> 25; + out[currentPos++] = (valn << 11) >>> 25; + out[currentPos++] = (valn << 18) >>> 25; + out[currentPos++] = (valn << 25) >>> 25; + } + + private void decode76(int val, int valn, int[] out, int currentPos) { + // number : 5, bitwidth : 5 + out[currentPos++] = (val << 8) >>> 4 | (valn >>> 25); + // number : 14, bitwidth : 2 + out[currentPos++] = (valn << 7) >>> 27; + out[currentPos++] = (valn << 12) >>> 27; + out[currentPos++] = (valn << 17) >>> 27; + out[currentPos++] = (valn << 22) >>> 27; + out[currentPos++] = (valn << 27) >>> 27; + } + + private void decode75(int val, int valn, int[] out, int currentPos) { + // number : 1, bitwidth : 28 + out[currentPos++] = (val << 8) >>> 4 | (valn >>> 28); + // number : 7, bitwidth : 4 + out[currentPos++] = (valn << 4) >>> 28; + out[currentPos++] = (valn << 8) >>> 28; + out[currentPos++] = (valn << 12) >>> 28; + out[currentPos++] = (valn << 16) >>> 28; + out[currentPos++] = (valn << 20) >>> 28; + out[currentPos++] = (valn << 24) >>> 28; + out[currentPos++] = (valn << 28) >>> 28; + } + + private void decode74(int val, int valn, int[] out, int currentPos) { + // number : 1, bitwidth : 28 + out[currentPos++] = (val << 8) >>> 4 | (valn >>> 27); + // number : 9, bitwidth : 3 + out[currentPos++] = (valn << 5) >>> 29; + out[currentPos++] = (valn << 8) >>> 29; + out[currentPos++] = (valn << 11) >>> 29; + out[currentPos++] = (valn << 14) >>> 29; + out[currentPos++] = (valn << 17) >>> 29; + out[currentPos++] = (valn << 20) >>> 29; + out[currentPos++] = (valn << 23) >>> 29; + out[currentPos++] = (valn << 26) >>> 29; + out[currentPos++] = (valn << 29) >>> 29; + } + + private void decode73(int val, int valn, int[] out, int currentPos) { + // number : 1, bitwidth : 28 + out[currentPos++] = (val << 8) >>> 4 | (valn >>> 28); + // number : 14, bitwidth : 2 + out[currentPos++] = (valn << 4) >>> 30; + out[currentPos++] = (valn << 6) >>> 30; + out[currentPos++] = (valn << 8) >>> 30; + out[currentPos++] = (valn << 10) >>> 30; + out[currentPos++] = (valn << 12) >>> 30; + out[currentPos++] = (valn << 14) >>> 30; + out[currentPos++] = (valn << 16) >>> 30; + out[currentPos++] = (valn << 18) >>> 30; + out[currentPos++] = (valn << 20) >>> 30; + out[currentPos++] = (valn << 22) >>> 30; // 10 + out[currentPos++] = (valn << 24) >>> 30; + out[currentPos++] = (valn << 26) >>> 30; + out[currentPos++] = (valn << 28) >>> 30; + out[currentPos++] = (valn << 30) >>> 30; + } + + private void decode72(int val, int valn, int[] out, int currentPos) { + // number : 1, bitwidth : 28 + out[currentPos++] = (val << 8) >>> 4 | (valn >>> 28); + // number : 28, bitwidth : 1 + out[currentPos++] = (valn << 4) >>> 31; + out[currentPos++] = (valn << 5) >>> 31; + out[currentPos++] = (valn << 6) >>> 31; + out[currentPos++] = (valn << 7) >>> 31; + out[currentPos++] = (valn << 8) >>> 31; + out[currentPos++] = (valn << 9) >>> 31; + out[currentPos++] = (valn << 10) >>> 31; + out[currentPos++] = (valn << 11) >>> 31; + out[currentPos++] = (valn << 12) >>> 31; + out[currentPos++] = (valn << 13) >>> 31; // 10 + out[currentPos++] = (valn << 14) >>> 31; + out[currentPos++] = (valn << 15) >>> 31; + out[currentPos++] = (valn << 16) >>> 31; + out[currentPos++] = (valn << 17) >>> 31; + out[currentPos++] = (valn << 18) >>> 31; + out[currentPos++] = (valn << 19) >>> 31; + out[currentPos++] = (valn << 20) >>> 31; + out[currentPos++] = (valn << 21) >>> 31; + out[currentPos++] = (valn << 22) >>> 31; + out[currentPos++] = (valn << 23) >>> 31; // 20 + out[currentPos++] = (valn << 24) >>> 31; + out[currentPos++] = (valn << 25) >>> 31; + out[currentPos++] = (valn << 26) >>> 31; + out[currentPos++] = (valn << 27) >>> 31; + out[currentPos++] = (valn << 28) >>> 31; + out[currentPos++] = (valn << 29) >>> 31; + out[currentPos++] = (valn << 30) >>> 31; + out[currentPos++] = (valn << 31) >>> 31; + } + + private void decode71(int val, int valn, int[] out, int currentPos) { + // number : 2, bitwidth : 14 + out[currentPos++] = (val << 8) >>> 18; + out[currentPos++] = (val << 22) >>> 18 | (valn >>> 28); + // number : 1, bitwidth : 28 + out[currentPos++] = (valn << 4) >>> 4; + } + + private void decode70(int val, int valn, int[] out, int currentPos) { + // number : 2, bitwidth : 14 + out[currentPos++] = (val << 8) >>> 18; + out[currentPos++] = (val << 22) >>> 18 | (valn >>> 28); + // number : 2, bitwidth : 14 + out[currentPos++] = (valn << 4) >>> 18; + out[currentPos++] = (valn << 18) >>> 18; + } + + private void decode69(int val, int valn, int[] out, int currentPos) { + // number : 2, bitwidth : 14 + out[currentPos++] = (val << 8) >>> 18; + out[currentPos++] = (val << 22) >>> 18 | (valn >>> 27); + // number : 3, bitwidth : 9 + out[currentPos++] = (valn << 5) >>> 23; + out[currentPos++] = (valn << 14) >>> 23; + out[currentPos++] = (valn << 23) >>> 23; + } + + private void decode68(int val, int valn, int[] out, int currentPos) { + // number : 2, bitwidth : 14 + out[currentPos++] = (val << 8) >>> 18; + out[currentPos++] = (val << 22) >>> 18 | (valn >>> 28); + // number : 4, bitwidth : 7 + out[currentPos++] = (valn << 4) >>> 25; + out[currentPos++] = (valn << 11) >>> 25; + out[currentPos++] = (valn << 18) >>> 25; + out[currentPos++] = (valn << 25) >>> 25; + } + + private void decode67(int val, int valn, int[] out, int currentPos) { + // number : 2, bitwidth : 14 + out[currentPos++] = (val << 8) >>> 18; + out[currentPos++] = (val << 22) >>> 18 | (valn >>> 25); + // number : 5, bitwidth : 5 + out[currentPos++] = (valn << 7) >>> 27; + out[currentPos++] = (valn << 12) >>> 27; + out[currentPos++] = (valn << 17) >>> 27; + out[currentPos++] = (valn << 22) >>> 27; + out[currentPos++] = (valn << 27) >>> 27; + } + + private void decode66(int val, int valn, int[] out, int currentPos) { + // number : 2, bitwidth : 14 + out[currentPos++] = (val << 8) >>> 18; + out[currentPos++] = (val << 22) >>> 18 | (valn >>> 28); + // number : 7, bitwidth : 4 + out[currentPos++] = (valn << 4) >>> 28; + out[currentPos++] = (valn << 8) >>> 28; + out[currentPos++] = (valn << 12) >>> 28; + out[currentPos++] = (valn << 16) >>> 28; + out[currentPos++] = (valn << 20) >>> 28; + out[currentPos++] = (valn << 24) >>> 28; + out[currentPos++] = (valn << 28) >>> 28; + } + + private void decode65(int val, int valn, int[] out, int currentPos) { + // number : 2, bitwidth : 14 + out[currentPos++] = (val << 8) >>> 18; + out[currentPos++] = (val << 22) >>> 18 | (valn >>> 27); + // number : 9, bitwidth : 3 + out[currentPos++] = (valn << 5) >>> 29; + out[currentPos++] = (valn << 8) >>> 29; + out[currentPos++] = (valn << 11) >>> 29; + out[currentPos++] = (valn << 14) >>> 29; + out[currentPos++] = (valn << 17) >>> 29; + out[currentPos++] = (valn << 20) >>> 29; + out[currentPos++] = (valn << 23) >>> 29; + out[currentPos++] = (valn << 26) >>> 29; + out[currentPos++] = (valn << 29) >>> 29; + } + + private void decode64(int val, int valn, int[] out, int currentPos) { + // number : 2, bitwidth : 14 + out[currentPos++] = (val << 8) >>> 18; + out[currentPos++] = (val << 22) >>> 18 | (valn >>> 28); + // number : 14, bitwidth : 2 + out[currentPos++] = (valn << 4) >>> 30; + out[currentPos++] = (valn << 6) >>> 30; + out[currentPos++] = (valn << 8) >>> 30; + out[currentPos++] = (valn << 10) >>> 30; + out[currentPos++] = (valn << 12) >>> 30; + out[currentPos++] = (valn << 14) >>> 30; + out[currentPos++] = (valn << 16) >>> 30; + out[currentPos++] = (valn << 18) >>> 30; + out[currentPos++] = (valn << 20) >>> 30; + out[currentPos++] = (valn << 22) >>> 30; // 10 + out[currentPos++] = (valn << 24) >>> 30; + out[currentPos++] = (valn << 26) >>> 30; + out[currentPos++] = (valn << 28) >>> 30; + out[currentPos++] = (valn << 30) >>> 30; + } + + private void decode63(int val, int valn, int[] out, int currentPos) { + // number : 2, bitwidth : 14 + out[currentPos++] = (val << 8) >>> 18; + out[currentPos++] = (val << 22) >>> 18 | (valn >>> 28); + // number : 28, bitwidth : 1 + out[currentPos++] = (valn << 4) >>> 31; + out[currentPos++] = (valn << 5) >>> 31; + out[currentPos++] = (valn << 6) >>> 31; + out[currentPos++] = (valn << 7) >>> 31; + out[currentPos++] = (valn << 8) >>> 31; + out[currentPos++] = (valn << 9) >>> 31; + out[currentPos++] = (valn << 10) >>> 31; + out[currentPos++] = (valn << 11) >>> 31; + out[currentPos++] = (valn << 12) >>> 31; + out[currentPos++] = (valn << 13) >>> 31; // 10 + out[currentPos++] = (valn << 14) >>> 31; + out[currentPos++] = (valn << 15) >>> 31; + out[currentPos++] = (valn << 16) >>> 31; + out[currentPos++] = (valn << 17) >>> 31; + out[currentPos++] = (valn << 18) >>> 31; + out[currentPos++] = (valn << 19) >>> 31; + out[currentPos++] = (valn << 20) >>> 31; + out[currentPos++] = (valn << 21) >>> 31; + out[currentPos++] = (valn << 22) >>> 31; + out[currentPos++] = (valn << 23) >>> 31; // 20 + out[currentPos++] = (valn << 24) >>> 31; + out[currentPos++] = (valn << 25) >>> 31; + out[currentPos++] = (valn << 26) >>> 31; + out[currentPos++] = (valn << 27) >>> 31; + out[currentPos++] = (valn << 28) >>> 31; + out[currentPos++] = (valn << 29) >>> 31; + out[currentPos++] = (valn << 30) >>> 31; + out[currentPos++] = (valn << 31) >>> 31; + } + + private void decode62(int val, int valn, int[] out, int currentPos) { + // number : 3, bitwidth : 9 + out[currentPos++] = (val << 8) >>> 23; + out[currentPos++] = (val << 17) >>> 23; + out[currentPos++] = (val << 26) >>> 23 | (valn >>> 28); + // number : 1, bitwidth : 28 + out[currentPos++] = (valn << 4) >>> 4; + } + + private void decode61(int val, int valn, int[] out, int currentPos) { + // number : 3, bitwidth : 9 + out[currentPos++] = (val << 8) >>> 23; + out[currentPos++] = (val << 17) >>> 23; + out[currentPos++] = (val << 26) >>> 23 | (valn >>> 28); + // number : 2, bitwidth : 14 + out[currentPos++] = (valn << 4) >>> 18; + out[currentPos++] = (valn << 18) >>> 18; + } + + private void decode60(int val, int valn, int[] out, int currentPos) { + // number : 3, bitwidth : 9 + out[currentPos++] = (val << 8) >>> 23; + out[currentPos++] = (val << 17) >>> 23; + out[currentPos++] = (val << 26) >>> 23 | (valn >>> 27); + // number : 3, bitwidth : 9 + out[currentPos++] = (valn << 5) >>> 23; + out[currentPos++] = (valn << 14) >>> 23; + out[currentPos++] = (valn << 23) >>> 23; + } + + private void decode59(int val, int valn, int[] out, int currentPos) { + // number : 3, bitwidth : 9 + out[currentPos++] = (val << 8) >>> 23; + out[currentPos++] = (val << 17) >>> 23; + out[currentPos++] = (val << 26) >>> 23 | (valn >>> 28); + // number : 4, bitwidth : 7 + out[currentPos++] = (valn << 4) >>> 25; + out[currentPos++] = (valn << 11) >>> 25; + out[currentPos++] = (valn << 18) >>> 25; + out[currentPos++] = (valn << 25) >>> 25; + } + + private void decode58(int val, int valn, int[] out, int currentPos) { + // number : 3, bitwidth : 9 + out[currentPos++] = (val << 8) >>> 23; + out[currentPos++] = (val << 17) >>> 23; + out[currentPos++] = (val << 26) >>> 23 | (valn >>> 25); + // number : 5, bitwidth : 5 + out[currentPos++] = (valn << 7) >>> 27; + out[currentPos++] = (valn << 12) >>> 27; + out[currentPos++] = (valn << 17) >>> 27; + out[currentPos++] = (valn << 22) >>> 27; + out[currentPos++] = (valn << 27) >>> 27; + } + + private void decode57(int val, int valn, int[] out, int currentPos) { + // number : 3, bitwidth : 9 + out[currentPos++] = (val << 8) >>> 23; + out[currentPos++] = (val << 17) >>> 23; + out[currentPos++] = (val << 26) >>> 23 | (valn >>> 28); + // number : 7, bitwidth : 4 + out[currentPos++] = (valn << 4) >>> 28; + out[currentPos++] = (valn << 8) >>> 28; + out[currentPos++] = (valn << 12) >>> 28; + out[currentPos++] = (valn << 16) >>> 28; + out[currentPos++] = (valn << 20) >>> 28; + out[currentPos++] = (valn << 24) >>> 28; + out[currentPos++] = (valn << 28) >>> 28; + } + + private void decode56(int val, int valn, int[] out, int currentPos) { + // number : 3, bitwidth : 9 + out[currentPos++] = (val << 8) >>> 23; + out[currentPos++] = (val << 17) >>> 23; + out[currentPos++] = (val << 26) >>> 23 | (valn >>> 27); + // number : 9, bitwidth : 3 + out[currentPos++] = (valn << 5) >>> 29; + out[currentPos++] = (valn << 8) >>> 29; + out[currentPos++] = (valn << 11) >>> 29; + out[currentPos++] = (valn << 14) >>> 29; + out[currentPos++] = (valn << 17) >>> 29; + out[currentPos++] = (valn << 20) >>> 29; + out[currentPos++] = (valn << 23) >>> 29; + out[currentPos++] = (valn << 26) >>> 29; + out[currentPos++] = (valn << 29) >>> 29; + } + + private void decode55(int val, int valn, int[] out, int currentPos) { + // number : 3, bitwidth : 9 + out[currentPos++] = (val << 8) >>> 23; + out[currentPos++] = (val << 17) >>> 23; + out[currentPos++] = (val << 26) >>> 23 | (valn >>> 28); + // number : 14, bitwidth : 2 + out[currentPos++] = (valn << 4) >>> 30; + out[currentPos++] = (valn << 6) >>> 30; + out[currentPos++] = (valn << 8) >>> 30; + out[currentPos++] = (valn << 10) >>> 30; + out[currentPos++] = (valn << 12) >>> 30; + out[currentPos++] = (valn << 14) >>> 30; + out[currentPos++] = (valn << 16) >>> 30; + out[currentPos++] = (valn << 18) >>> 30; + out[currentPos++] = (valn << 20) >>> 30; + out[currentPos++] = (valn << 22) >>> 30; // 10 + out[currentPos++] = (valn << 24) >>> 30; + out[currentPos++] = (valn << 26) >>> 30; + out[currentPos++] = (valn << 28) >>> 30; + out[currentPos++] = (valn << 30) >>> 30; + } + + private void decode54(int val, int valn, int[] out, int currentPos) { + // number : 3, bitwidth : 9 + out[currentPos++] = (val << 8) >>> 23; + out[currentPos++] = (val << 17) >>> 23; + out[currentPos++] = (val << 26) >>> 23 | (valn >>> 28); + // number : 28, bitwidth : 1 + out[currentPos++] = (valn << 4) >>> 31; + out[currentPos++] = (valn << 5) >>> 31; + out[currentPos++] = (valn << 6) >>> 31; + out[currentPos++] = (valn << 7) >>> 31; + out[currentPos++] = (valn << 8) >>> 31; + out[currentPos++] = (valn << 9) >>> 31; + out[currentPos++] = (valn << 10) >>> 31; + out[currentPos++] = (valn << 11) >>> 31; + out[currentPos++] = (valn << 12) >>> 31; + out[currentPos++] = (valn << 13) >>> 31; // 10 + out[currentPos++] = (valn << 14) >>> 31; + out[currentPos++] = (valn << 15) >>> 31; + out[currentPos++] = (valn << 16) >>> 31; + out[currentPos++] = (valn << 17) >>> 31; + out[currentPos++] = (valn << 18) >>> 31; + out[currentPos++] = (valn << 19) >>> 31; + out[currentPos++] = (valn << 20) >>> 31; + out[currentPos++] = (valn << 21) >>> 31; + out[currentPos++] = (valn << 22) >>> 31; + out[currentPos++] = (valn << 23) >>> 31; // 20 + out[currentPos++] = (valn << 24) >>> 31; + out[currentPos++] = (valn << 25) >>> 31; + out[currentPos++] = (valn << 26) >>> 31; + out[currentPos++] = (valn << 27) >>> 31; + out[currentPos++] = (valn << 28) >>> 31; + out[currentPos++] = (valn << 29) >>> 31; + out[currentPos++] = (valn << 30) >>> 31; + out[currentPos++] = (valn << 31) >>> 31; + } + + private void decode53(int val, int valn, int[] out, int currentPos) { + // number : 4, bitwidth : 7 + out[currentPos++] = (val << 8) >>> 25; + out[currentPos++] = (val << 15) >>> 25; + out[currentPos++] = (val << 22) >>> 25; + out[currentPos++] = (val << 29) >>> 25 | (valn >>> 28); + // number : 1, bitwidth : 28 + out[currentPos++] = (valn << 4) >>> 4; + } + + private void decode52(int val, int valn, int[] out, int currentPos) { + // number : 4, bitwidth : 7 + out[currentPos++] = (val << 8) >>> 25; + out[currentPos++] = (val << 15) >>> 25; + out[currentPos++] = (val << 22) >>> 25; + out[currentPos++] = (val << 29) >>> 25 | (valn >>> 28); + // number : 2, bitwidth : 14 + out[currentPos++] = (valn << 4) >>> 18; + out[currentPos++] = (valn << 18) >>> 18; + } + + private void decode51(int val, int valn, int[] out, int currentPos) { + // number : 4, bitwidth : 7 + out[currentPos++] = (val << 8) >>> 25; + out[currentPos++] = (val << 15) >>> 25; + out[currentPos++] = (val << 22) >>> 25; + out[currentPos++] = (val << 29) >>> 25 | (valn >>> 27); + // number : 3, bitwidth : 9 + out[currentPos++] = (valn << 5) >>> 23; + out[currentPos++] = (valn << 14) >>> 23; + out[currentPos++] = (valn << 23) >>> 23; + } + + private void decode50(int val, int valn, int[] out, int currentPos) { + // number : 4, bitwidth : 7 + out[currentPos++] = (val << 8) >>> 25; + out[currentPos++] = (val << 15) >>> 25; + out[currentPos++] = (val << 22) >>> 25; + out[currentPos++] = (val << 29) >>> 25 | (valn >>> 28); + // number : 4, bitwidth : 7 + out[currentPos++] = (valn << 4) >>> 25; + out[currentPos++] = (valn << 11) >>> 25; + out[currentPos++] = (valn << 18) >>> 25; + out[currentPos++] = (valn << 25) >>> 25; + } + + private void decode49(int val, int valn, int[] out, int currentPos) { + // number : 4, bitwidth : 7 + out[currentPos++] = (val << 8) >>> 25; + out[currentPos++] = (val << 15) >>> 25; + out[currentPos++] = (val << 22) >>> 25; + out[currentPos++] = (val << 29) >>> 25 | (valn >>> 25); + // number : 5, bitwidth : 5 + out[currentPos++] = (valn << 7) >>> 27; + out[currentPos++] = (valn << 12) >>> 27; + out[currentPos++] = (valn << 17) >>> 27; + out[currentPos++] = (valn << 22) >>> 27; + out[currentPos++] = (valn << 27) >>> 27; + } + + private void decode48(int val, int valn, int[] out, int currentPos) { + // number : 4, bitwidth : 7 + out[currentPos++] = (val << 8) >>> 25; + out[currentPos++] = (val << 15) >>> 25; + out[currentPos++] = (val << 22) >>> 25; + out[currentPos++] = (val << 29) >>> 25 | (valn >>> 28); + // number : 7, bitwidth : 4 + out[currentPos++] = (valn << 4) >>> 28; + out[currentPos++] = (valn << 8) >>> 28; + out[currentPos++] = (valn << 12) >>> 28; + out[currentPos++] = (valn << 16) >>> 28; + out[currentPos++] = (valn << 20) >>> 28; + out[currentPos++] = (valn << 24) >>> 28; + out[currentPos++] = (valn << 28) >>> 28; + } + + private void decode47(int val, int valn, int[] out, int currentPos) { + // number : 4, bitwidth : 7 + out[currentPos++] = (val << 8) >>> 25; + out[currentPos++] = (val << 15) >>> 25; + out[currentPos++] = (val << 22) >>> 25; + out[currentPos++] = (val << 29) >>> 25 | (valn >>> 27); + // number : 9, bitwidth : 3 + out[currentPos++] = (valn << 5) >>> 29; + out[currentPos++] = (valn << 8) >>> 29; + out[currentPos++] = (valn << 11) >>> 29; + out[currentPos++] = (valn << 14) >>> 29; + out[currentPos++] = (valn << 17) >>> 29; + out[currentPos++] = (valn << 20) >>> 29; + out[currentPos++] = (valn << 23) >>> 29; + out[currentPos++] = (valn << 26) >>> 29; + out[currentPos++] = (valn << 29) >>> 29; + } + + private void decode46(int val, int valn, int[] out, int currentPos) { + // number : 4, bitwidth : 7 + out[currentPos++] = (val << 8) >>> 25; + out[currentPos++] = (val << 15) >>> 25; + out[currentPos++] = (val << 22) >>> 25; + out[currentPos++] = (val << 29) >>> 25 | (valn >>> 28); + // number : 14, bitwidth : 2 + out[currentPos++] = (valn << 4) >>> 30; + out[currentPos++] = (valn << 6) >>> 30; + out[currentPos++] = (valn << 8) >>> 30; + out[currentPos++] = (valn << 10) >>> 30; + out[currentPos++] = (valn << 12) >>> 30; + out[currentPos++] = (valn << 14) >>> 30; + out[currentPos++] = (valn << 16) >>> 30; + out[currentPos++] = (valn << 18) >>> 30; + out[currentPos++] = (valn << 20) >>> 30; + out[currentPos++] = (valn << 22) >>> 30; // 10 + out[currentPos++] = (valn << 24) >>> 30; + out[currentPos++] = (valn << 26) >>> 30; + out[currentPos++] = (valn << 28) >>> 30; + out[currentPos++] = (valn << 30) >>> 30; + } + + private void decode45(int val, int valn, int[] out, int currentPos) { + // number : 4, bitwidth : 7 + out[currentPos++] = (val << 8) >>> 25; + out[currentPos++] = (val << 15) >>> 25; + out[currentPos++] = (val << 22) >>> 25; + out[currentPos++] = (val << 29) >>> 25 | (valn >>> 28); + // number : 28, bitwidth : 1 + out[currentPos++] = (valn << 4) >>> 31; + out[currentPos++] = (valn << 5) >>> 31; + out[currentPos++] = (valn << 6) >>> 31; + out[currentPos++] = (valn << 7) >>> 31; + out[currentPos++] = (valn << 8) >>> 31; + out[currentPos++] = (valn << 9) >>> 31; + out[currentPos++] = (valn << 10) >>> 31; + out[currentPos++] = (valn << 11) >>> 31; + out[currentPos++] = (valn << 12) >>> 31; + out[currentPos++] = (valn << 13) >>> 31; // 10 + out[currentPos++] = (valn << 14) >>> 31; + out[currentPos++] = (valn << 15) >>> 31; + out[currentPos++] = (valn << 16) >>> 31; + out[currentPos++] = (valn << 17) >>> 31; + out[currentPos++] = (valn << 18) >>> 31; + out[currentPos++] = (valn << 19) >>> 31; + out[currentPos++] = (valn << 20) >>> 31; + out[currentPos++] = (valn << 21) >>> 31; + out[currentPos++] = (valn << 22) >>> 31; + out[currentPos++] = (valn << 23) >>> 31; // 20 + out[currentPos++] = (valn << 24) >>> 31; + out[currentPos++] = (valn << 25) >>> 31; + out[currentPos++] = (valn << 26) >>> 31; + out[currentPos++] = (valn << 27) >>> 31; + out[currentPos++] = (valn << 28) >>> 31; + out[currentPos++] = (valn << 29) >>> 31; + out[currentPos++] = (valn << 30) >>> 31; + out[currentPos++] = (valn << 31) >>> 31; + } + + private void decode44(int val, int valn, int[] out, int currentPos) { + // number : 5, bitwidth : 5 + out[currentPos++] = (val << 8) >>> 27; + out[currentPos++] = (val << 13) >>> 27; + out[currentPos++] = (val << 18) >>> 27; + out[currentPos++] = (val << 23) >>> 27; + out[currentPos++] = (val << 28) >>> 27 | (valn >>> 28); + // number : 1, bitwidth : 28 + out[currentPos++] = (valn << 4) >>> 4; + } + + private void decode43(int val, int valn, int[] out, int currentPos) { + // number : 5, bitwidth : 5 + out[currentPos++] = (val << 8) >>> 27; + out[currentPos++] = (val << 13) >>> 27; + out[currentPos++] = (val << 18) >>> 27; + out[currentPos++] = (val << 23) >>> 27; + out[currentPos++] = (val << 28) >>> 27 | (valn >>> 28); + // number : 2, bitwidth : 14 + out[currentPos++] = (valn << 4) >>> 18; + out[currentPos++] = (valn << 18) >>> 18; + } + + private void decode42(int val, int valn, int[] out, int currentPos) { + // number : 5, bitwidth : 5 + out[currentPos++] = (val << 8) >>> 27; + out[currentPos++] = (val << 13) >>> 27; + out[currentPos++] = (val << 18) >>> 27; + out[currentPos++] = (val << 23) >>> 27; + out[currentPos++] = (val << 28) >>> 27 | (valn >>> 27); + // number : 3, bitwidth : 9 + out[currentPos++] = (valn << 5) >>> 23; + out[currentPos++] = (valn << 14) >>> 23; + out[currentPos++] = (valn << 23) >>> 23; + } + + private void decode41(int val, int valn, int[] out, int currentPos) { + // number : 5, bitwidth : 5 + out[currentPos++] = (val << 8) >>> 27; + out[currentPos++] = (val << 13) >>> 27; + out[currentPos++] = (val << 18) >>> 27; + out[currentPos++] = (val << 23) >>> 27; + out[currentPos++] = (val << 28) >>> 27 | (valn >>> 28); + // number : 4, bitwidth : 7 + out[currentPos++] = (valn << 4) >>> 25; + out[currentPos++] = (valn << 11) >>> 25; + out[currentPos++] = (valn << 18) >>> 25; + out[currentPos++] = (valn << 25) >>> 25; + } + + private void decode40(int val, int valn, int[] out, int currentPos) { + // number : 5, bitwidth : 5 + out[currentPos++] = (val << 8) >>> 27; + out[currentPos++] = (val << 13) >>> 27; + out[currentPos++] = (val << 18) >>> 27; + out[currentPos++] = (val << 23) >>> 27; + out[currentPos++] = (val << 28) >>> 27 | (valn >>> 25); + // number : 5, bitwidth : 5 + out[currentPos++] = (valn << 7) >>> 27; + out[currentPos++] = (valn << 12) >>> 27; + out[currentPos++] = (valn << 17) >>> 27; + out[currentPos++] = (valn << 22) >>> 27; + out[currentPos++] = (valn << 27) >>> 27; + } + + private void decode39(int val, int valn, int[] out, int currentPos) { + // number : 5, bitwidth : 5 + out[currentPos++] = (val << 8) >>> 27; + out[currentPos++] = (val << 13) >>> 27; + out[currentPos++] = (val << 18) >>> 27; + out[currentPos++] = (val << 23) >>> 27; + out[currentPos++] = (val << 28) >>> 27 | (valn >>> 28); + // number : 7, bitwidth : 4 + out[currentPos++] = (valn << 4) >>> 28; + out[currentPos++] = (valn << 8) >>> 28; + out[currentPos++] = (valn << 12) >>> 28; + out[currentPos++] = (valn << 16) >>> 28; + out[currentPos++] = (valn << 20) >>> 28; + out[currentPos++] = (valn << 24) >>> 28; + out[currentPos++] = (valn << 28) >>> 28; + } + + private void decode38(int val, int valn, int[] out, int currentPos) { + // number : 5, bitwidth : 5 + out[currentPos++] = (val << 8) >>> 27; + out[currentPos++] = (val << 13) >>> 27; + out[currentPos++] = (val << 18) >>> 27; + out[currentPos++] = (val << 23) >>> 27; + out[currentPos++] = (val << 28) >>> 27 | (valn >>> 27); + // number : 9, bitwidth : 3 + out[currentPos++] = (valn << 5) >>> 29; + out[currentPos++] = (valn << 8) >>> 29; + out[currentPos++] = (valn << 11) >>> 29; + out[currentPos++] = (valn << 14) >>> 29; + out[currentPos++] = (valn << 17) >>> 29; + out[currentPos++] = (valn << 20) >>> 29; + out[currentPos++] = (valn << 23) >>> 29; + out[currentPos++] = (valn << 26) >>> 29; + out[currentPos++] = (valn << 29) >>> 29; + } + + private void decode37(int val, int valn, int[] out, int currentPos) { + // number : 5, bitwidth : 5 + out[currentPos++] = (val << 8) >>> 27; + out[currentPos++] = (val << 13) >>> 27; + out[currentPos++] = (val << 18) >>> 27; + out[currentPos++] = (val << 23) >>> 27; + out[currentPos++] = (val << 28) >>> 27 | (valn >>> 28); + // number : 14, bitwidth : 2 + out[currentPos++] = (valn << 4) >>> 30; + out[currentPos++] = (valn << 6) >>> 30; + out[currentPos++] = (valn << 8) >>> 30; + out[currentPos++] = (valn << 10) >>> 30; + out[currentPos++] = (valn << 12) >>> 30; + out[currentPos++] = (valn << 14) >>> 30; + out[currentPos++] = (valn << 16) >>> 30; + out[currentPos++] = (valn << 18) >>> 30; + out[currentPos++] = (valn << 20) >>> 30; + out[currentPos++] = (valn << 22) >>> 30; // 10 + out[currentPos++] = (valn << 24) >>> 30; + out[currentPos++] = (valn << 26) >>> 30; + out[currentPos++] = (valn << 28) >>> 30; + out[currentPos++] = (valn << 30) >>> 30; + } + + private void decode36(int val, int valn, int[] out, int currentPos) { + // number : 5, bitwidth : 5 + out[currentPos++] = (val << 8) >>> 27; + out[currentPos++] = (val << 13) >>> 27; + out[currentPos++] = (val << 18) >>> 27; + out[currentPos++] = (val << 23) >>> 27; + out[currentPos++] = (val << 28) >>> 27 | (valn >>> 28); + // number : 28, bitwidth : 1 + out[currentPos++] = (valn << 4) >>> 31; + out[currentPos++] = (valn << 5) >>> 31; + out[currentPos++] = (valn << 6) >>> 31; + out[currentPos++] = (valn << 7) >>> 31; + out[currentPos++] = (valn << 8) >>> 31; + out[currentPos++] = (valn << 9) >>> 31; + out[currentPos++] = (valn << 10) >>> 31; + out[currentPos++] = (valn << 11) >>> 31; + out[currentPos++] = (valn << 12) >>> 31; + out[currentPos++] = (valn << 13) >>> 31; // 10 + out[currentPos++] = (valn << 14) >>> 31; + out[currentPos++] = (valn << 15) >>> 31; + out[currentPos++] = (valn << 16) >>> 31; + out[currentPos++] = (valn << 17) >>> 31; + out[currentPos++] = (valn << 18) >>> 31; + out[currentPos++] = (valn << 19) >>> 31; + out[currentPos++] = (valn << 20) >>> 31; + out[currentPos++] = (valn << 21) >>> 31; + out[currentPos++] = (valn << 22) >>> 31; + out[currentPos++] = (valn << 23) >>> 31; // 20 + out[currentPos++] = (valn << 24) >>> 31; + out[currentPos++] = (valn << 25) >>> 31; + out[currentPos++] = (valn << 26) >>> 31; + out[currentPos++] = (valn << 27) >>> 31; + out[currentPos++] = (valn << 28) >>> 31; + out[currentPos++] = (valn << 29) >>> 31; + out[currentPos++] = (valn << 30) >>> 31; + out[currentPos++] = (valn << 31) >>> 31; + } + + private void decode35(int val, int valn, int[] out, int currentPos) { + // number : 7, bitwidth : 4 + out[currentPos++] = (val << 8) >>> 28; + out[currentPos++] = (val << 12) >>> 28; + out[currentPos++] = (val << 16) >>> 28; + out[currentPos++] = (val << 20) >>> 28; + out[currentPos++] = (val << 24) >>> 28; + out[currentPos++] = (val << 28) >>> 28; + out[currentPos++] = (valn << 0) >>> 28; + // number : 1, bitwidth : 28 + out[currentPos++] = (valn << 4) >>> 4; + } + + private void decode34(int val, int valn, int[] out, int currentPos) { + // number : 7, bitwidth : 4 + out[currentPos++] = (val << 8) >>> 28; + out[currentPos++] = (val << 12) >>> 28; + out[currentPos++] = (val << 16) >>> 28; + out[currentPos++] = (val << 20) >>> 28; + out[currentPos++] = (val << 24) >>> 28; + out[currentPos++] = (val << 28) >>> 28; + out[currentPos++] = (valn << 0) >>> 28; + // number : 2, bitwidth : 14 + out[currentPos++] = (valn << 4) >>> 18; + out[currentPos++] = (valn << 18) >>> 18; + } + + private void decode33(int val, int valn, int[] out, int currentPos) { + // number : 7, bitwidth : 4 + out[currentPos++] = (val << 8) >>> 28; + out[currentPos++] = (val << 12) >>> 28; + out[currentPos++] = (val << 16) >>> 28; + out[currentPos++] = (val << 20) >>> 28; + out[currentPos++] = (val << 24) >>> 28; + out[currentPos++] = (val << 28) >>> 28; + out[currentPos++] = (valn << 1) >>> 28; + // number : 3, bitwidth : 9 + out[currentPos++] = (valn << 5) >>> 23; + out[currentPos++] = (valn << 14) >>> 23; + out[currentPos++] = (valn << 23) >>> 23; + } + + private void decode32(int val, int valn, int[] out, int currentPos) { + // number : 7, bitwidth : 4 + out[currentPos++] = (val << 8) >>> 28; + out[currentPos++] = (val << 12) >>> 28; + out[currentPos++] = (val << 16) >>> 28; + out[currentPos++] = (val << 20) >>> 28; + out[currentPos++] = (val << 24) >>> 28; + out[currentPos++] = (val << 28) >>> 28; + out[currentPos++] = (valn << 0) >>> 28; + // number : 4, bitwidth : 7 + out[currentPos++] = (valn << 4) >>> 25; + out[currentPos++] = (valn << 11) >>> 25; + out[currentPos++] = (valn << 18) >>> 25; + out[currentPos++] = (valn << 25) >>> 25; + } + + private void decode31(int val, int valn, int[] out, int currentPos) { + // number : 7, bitwidth : 4 + out[currentPos++] = (val << 8) >>> 28; + out[currentPos++] = (val << 12) >>> 28; + out[currentPos++] = (val << 16) >>> 28; + out[currentPos++] = (val << 20) >>> 28; + out[currentPos++] = (val << 24) >>> 28; + out[currentPos++] = (val << 28) >>> 28; + out[currentPos++] = (valn << 3) >>> 28; + // number : 5, bitwidth : 5 + out[currentPos++] = (valn << 7) >>> 27; + out[currentPos++] = (valn << 12) >>> 27; + out[currentPos++] = (valn << 17) >>> 27; + out[currentPos++] = (valn << 22) >>> 27; + out[currentPos++] = (valn << 27) >>> 27; + } + + private void decode30(int val, int valn, int[] out, int currentPos) { + // number : 7, bitwidth : 4 + out[currentPos++] = (val << 8) >>> 28; + out[currentPos++] = (val << 12) >>> 28; + out[currentPos++] = (val << 16) >>> 28; + out[currentPos++] = (val << 20) >>> 28; + out[currentPos++] = (val << 24) >>> 28; + out[currentPos++] = (val << 28) >>> 28; + out[currentPos++] = (valn << 0) >>> 28; + // number : 7, bitwidth : 4 + out[currentPos++] = (valn << 4) >>> 28; + out[currentPos++] = (valn << 8) >>> 28; + out[currentPos++] = (valn << 12) >>> 28; + out[currentPos++] = (valn << 16) >>> 28; + out[currentPos++] = (valn << 20) >>> 28; + out[currentPos++] = (valn << 24) >>> 28; + out[currentPos++] = (valn << 28) >>> 28; + } + + private void decode29(int val, int valn, int[] out, int currentPos) { + // number : 7, bitwidth : 4 + out[currentPos++] = (val << 8) >>> 28; + out[currentPos++] = (val << 12) >>> 28; + out[currentPos++] = (val << 16) >>> 28; + out[currentPos++] = (val << 20) >>> 28; + out[currentPos++] = (val << 24) >>> 28; + out[currentPos++] = (val << 28) >>> 28; + out[currentPos++] = (valn << 1) >>> 28; + // number : 9, bitwidth : 3 + out[currentPos++] = (valn << 5) >>> 29; + out[currentPos++] = (valn << 8) >>> 29; + out[currentPos++] = (valn << 11) >>> 29; + out[currentPos++] = (valn << 14) >>> 29; + out[currentPos++] = (valn << 17) >>> 29; + out[currentPos++] = (valn << 20) >>> 29; + out[currentPos++] = (valn << 23) >>> 29; + out[currentPos++] = (valn << 26) >>> 29; + out[currentPos++] = (valn << 29) >>> 29; + } + + private void decode28(int val, int valn, int[] out, int currentPos) { + // number : 7, bitwidth : 4 + out[currentPos++] = (val << 8) >>> 28; + out[currentPos++] = (val << 12) >>> 28; + out[currentPos++] = (val << 16) >>> 28; + out[currentPos++] = (val << 20) >>> 28; + out[currentPos++] = (val << 24) >>> 28; + out[currentPos++] = (val << 28) >>> 28; + out[currentPos++] = (valn << 0) >>> 28; + // number : 14, bitwidth : 2 + out[currentPos++] = (valn << 4) >>> 30; + out[currentPos++] = (valn << 6) >>> 30; + out[currentPos++] = (valn << 8) >>> 30; + out[currentPos++] = (valn << 10) >>> 30; + out[currentPos++] = (valn << 12) >>> 30; + out[currentPos++] = (valn << 14) >>> 30; + out[currentPos++] = (valn << 16) >>> 30; + out[currentPos++] = (valn << 18) >>> 30; + out[currentPos++] = (valn << 20) >>> 30; + out[currentPos++] = (valn << 22) >>> 30; // 10 + out[currentPos++] = (valn << 24) >>> 30; + out[currentPos++] = (valn << 26) >>> 30; + out[currentPos++] = (valn << 28) >>> 30; + out[currentPos++] = (valn << 30) >>> 30; + } + + private void decode27(int val, int valn, int[] out, int currentPos) { + // number : 7, bitwidth : 4 + out[currentPos++] = (val << 8) >>> 28; + out[currentPos++] = (val << 12) >>> 28; + out[currentPos++] = (val << 16) >>> 28; + out[currentPos++] = (val << 20) >>> 28; + out[currentPos++] = (val << 24) >>> 28; + out[currentPos++] = (val << 28) >>> 28; + out[currentPos++] = (valn << 0) >>> 28; + // number : 28, bitwidth : 1 + out[currentPos++] = (valn << 4) >>> 31; + out[currentPos++] = (valn << 5) >>> 31; + out[currentPos++] = (valn << 6) >>> 31; + out[currentPos++] = (valn << 7) >>> 31; + out[currentPos++] = (valn << 8) >>> 31; + out[currentPos++] = (valn << 9) >>> 31; + out[currentPos++] = (valn << 10) >>> 31; + out[currentPos++] = (valn << 11) >>> 31; + out[currentPos++] = (valn << 12) >>> 31; + out[currentPos++] = (valn << 13) >>> 31; // 10 + out[currentPos++] = (valn << 14) >>> 31; + out[currentPos++] = (valn << 15) >>> 31; + out[currentPos++] = (valn << 16) >>> 31; + out[currentPos++] = (valn << 17) >>> 31; + out[currentPos++] = (valn << 18) >>> 31; + out[currentPos++] = (valn << 19) >>> 31; + out[currentPos++] = (valn << 20) >>> 31; + out[currentPos++] = (valn << 21) >>> 31; + out[currentPos++] = (valn << 22) >>> 31; + out[currentPos++] = (valn << 23) >>> 31; // 20 + out[currentPos++] = (valn << 24) >>> 31; + out[currentPos++] = (valn << 25) >>> 31; + out[currentPos++] = (valn << 26) >>> 31; + out[currentPos++] = (valn << 27) >>> 31; + out[currentPos++] = (valn << 28) >>> 31; + out[currentPos++] = (valn << 29) >>> 31; + out[currentPos++] = (valn << 30) >>> 31; + out[currentPos++] = (valn << 31) >>> 31; + } + + private void decode26(int val, int valn, int[] out, int currentPos) { + // number : 9, bitwidth : 3 + out[currentPos++] = (val << 8) >>> 29; + out[currentPos++] = (val << 11) >>> 29; + out[currentPos++] = (val << 14) >>> 29; + out[currentPos++] = (val << 17) >>> 29; + out[currentPos++] = (val << 20) >>> 29; + out[currentPos++] = (val << 23) >>> 29; + out[currentPos++] = (val << 26) >>> 29; + out[currentPos++] = (val << 29) >>> 29; + out[currentPos++] = (valn << 1) >>> 29; + // number : 1, bitwidth : 28 + out[currentPos++] = (valn << 4) >>> 4; + } + + private void decode25(int val, int valn, int[] out, int currentPos) { + // number : 9, bitwidth : 3 + out[currentPos++] = (val << 8) >>> 29; + out[currentPos++] = (val << 11) >>> 29; + out[currentPos++] = (val << 14) >>> 29; + out[currentPos++] = (val << 17) >>> 29; + out[currentPos++] = (val << 20) >>> 29; + out[currentPos++] = (val << 23) >>> 29; + out[currentPos++] = (val << 26) >>> 29; + out[currentPos++] = (val << 29) >>> 29; + out[currentPos++] = (valn << 1) >>> 29; + // number : 2, bitwidth : 14 + out[currentPos++] = (valn << 4) >>> 18; + out[currentPos++] = (valn << 18) >>> 18; + } + + private void decode24(int val, int valn, int[] out, int currentPos) { + // number : 9, bitwidth : 3 + out[currentPos++] = (val << 8) >>> 29; + out[currentPos++] = (val << 11) >>> 29; + out[currentPos++] = (val << 14) >>> 29; + out[currentPos++] = (val << 17) >>> 29; + out[currentPos++] = (val << 20) >>> 29; + out[currentPos++] = (val << 23) >>> 29; + out[currentPos++] = (val << 26) >>> 29; + out[currentPos++] = (val << 29) >>> 29; + out[currentPos++] = (valn << 2) >>> 29; + // number : 3, bitwidth : 9 + out[currentPos++] = (valn << 5) >>> 23; + out[currentPos++] = (valn << 14) >>> 23; + out[currentPos++] = (valn << 23) >>> 23; + } + + private void decode23(int val, int valn, int[] out, int currentPos) { + // number : 9, bitwidth : 3 + out[currentPos++] = (val << 8) >>> 29; + out[currentPos++] = (val << 11) >>> 29; + out[currentPos++] = (val << 14) >>> 29; + out[currentPos++] = (val << 17) >>> 29; + out[currentPos++] = (val << 20) >>> 29; + out[currentPos++] = (val << 23) >>> 29; + out[currentPos++] = (val << 26) >>> 29; + out[currentPos++] = (val << 29) >>> 29; + out[currentPos++] = (valn << 1) >>> 29; + // number : 4, bitwidth : 7 + out[currentPos++] = (valn << 4) >>> 25; + out[currentPos++] = (valn << 11) >>> 25; + out[currentPos++] = (valn << 18) >>> 25; + out[currentPos++] = (valn << 25) >>> 25; + } + + private void decode22(int val, int valn, int[] out, int currentPos) { + // number : 9, bitwidth : 3 + out[currentPos++] = (val << 8) >>> 29; + out[currentPos++] = (val << 11) >>> 29; + out[currentPos++] = (val << 14) >>> 29; + out[currentPos++] = (val << 17) >>> 29; + out[currentPos++] = (val << 20) >>> 29; + out[currentPos++] = (val << 23) >>> 29; + out[currentPos++] = (val << 26) >>> 29; + out[currentPos++] = (val << 29) >>> 29; + out[currentPos++] = (valn << 4) >>> 29; + // number : 5, bitwidth : 5 + out[currentPos++] = (valn << 7) >>> 27; + out[currentPos++] = (valn << 12) >>> 27; + out[currentPos++] = (valn << 17) >>> 27; + out[currentPos++] = (valn << 22) >>> 27; + out[currentPos++] = (valn << 27) >>> 27; + } + + private void decode21(int val, int valn, int[] out, int currentPos) { + // number : 9, bitwidth : 3 + out[currentPos++] = (val << 8) >>> 29; + out[currentPos++] = (val << 11) >>> 29; + out[currentPos++] = (val << 14) >>> 29; + out[currentPos++] = (val << 17) >>> 29; + out[currentPos++] = (val << 20) >>> 29; + out[currentPos++] = (val << 23) >>> 29; + out[currentPos++] = (val << 26) >>> 29; + out[currentPos++] = (val << 29) >>> 29; + out[currentPos++] = (valn << 1) >>> 29; + // number : 7, bitwidth : 4 + out[currentPos++] = (valn << 4) >>> 28; + out[currentPos++] = (valn << 8) >>> 28; + out[currentPos++] = (valn << 12) >>> 28; + out[currentPos++] = (valn << 16) >>> 28; + out[currentPos++] = (valn << 20) >>> 28; + out[currentPos++] = (valn << 24) >>> 28; + out[currentPos++] = (valn << 28) >>> 28; + } + + private void decode20(int val, int valn, int[] out, int currentPos) { + // number : 9, bitwidth : 3 + out[currentPos++] = (val << 8) >>> 29; + out[currentPos++] = (val << 11) >>> 29; + out[currentPos++] = (val << 14) >>> 29; + out[currentPos++] = (val << 17) >>> 29; + out[currentPos++] = (val << 20) >>> 29; + out[currentPos++] = (val << 23) >>> 29; + out[currentPos++] = (val << 26) >>> 29; + out[currentPos++] = (val << 29) >>> 29; + out[currentPos++] = (valn << 2) >>> 29; + // number : 9, bitwidth : 3 + out[currentPos++] = (valn << 5) >>> 29; + out[currentPos++] = (valn << 8) >>> 29; + out[currentPos++] = (valn << 11) >>> 29; + out[currentPos++] = (valn << 14) >>> 29; + out[currentPos++] = (valn << 17) >>> 29; + out[currentPos++] = (valn << 20) >>> 29; + out[currentPos++] = (valn << 23) >>> 29; + out[currentPos++] = (valn << 26) >>> 29; + out[currentPos++] = (valn << 29) >>> 29; + } + + private void decode19(int val, int valn, int[] out, int currentPos) { + // number : 9, bitwidth : 3 + out[currentPos++] = (val << 8) >>> 29; + out[currentPos++] = (val << 11) >>> 29; + out[currentPos++] = (val << 14) >>> 29; + out[currentPos++] = (val << 17) >>> 29; + out[currentPos++] = (val << 20) >>> 29; + out[currentPos++] = (val << 23) >>> 29; + out[currentPos++] = (val << 26) >>> 29; + out[currentPos++] = (val << 29) >>> 29; + out[currentPos++] = (valn << 1) >>> 29; + // number : 14, bitwidth : 2 + out[currentPos++] = (valn << 4) >>> 30; + out[currentPos++] = (valn << 6) >>> 30; + out[currentPos++] = (valn << 8) >>> 30; + out[currentPos++] = (valn << 10) >>> 30; + out[currentPos++] = (valn << 12) >>> 30; + out[currentPos++] = (valn << 14) >>> 30; + out[currentPos++] = (valn << 16) >>> 30; + out[currentPos++] = (valn << 18) >>> 30; + out[currentPos++] = (valn << 20) >>> 30; + out[currentPos++] = (valn << 22) >>> 30; // 10 + out[currentPos++] = (valn << 24) >>> 30; + out[currentPos++] = (valn << 26) >>> 30; + out[currentPos++] = (valn << 28) >>> 30; + out[currentPos++] = (valn << 30) >>> 30; + } + + private void decode18(int val, int valn, int[] out, int currentPos) { + // number : 9, bitwidth : 3 + out[currentPos++] = (val << 8) >>> 29; + out[currentPos++] = (val << 11) >>> 29; + out[currentPos++] = (val << 14) >>> 29; + out[currentPos++] = (val << 17) >>> 29; + out[currentPos++] = (val << 20) >>> 29; + out[currentPos++] = (val << 23) >>> 29; + out[currentPos++] = (val << 26) >>> 29; + out[currentPos++] = (val << 29) >>> 29; + out[currentPos++] = (valn << 1) >>> 29; + // number : 28, bitwidth : 1 + out[currentPos++] = (valn << 4) >>> 31; + out[currentPos++] = (valn << 5) >>> 31; + out[currentPos++] = (valn << 6) >>> 31; + out[currentPos++] = (valn << 7) >>> 31; + out[currentPos++] = (valn << 8) >>> 31; + out[currentPos++] = (valn << 9) >>> 31; + out[currentPos++] = (valn << 10) >>> 31; + out[currentPos++] = (valn << 11) >>> 31; + out[currentPos++] = (valn << 12) >>> 31; + out[currentPos++] = (valn << 13) >>> 31; // 10 + out[currentPos++] = (valn << 14) >>> 31; + out[currentPos++] = (valn << 15) >>> 31; + out[currentPos++] = (valn << 16) >>> 31; + out[currentPos++] = (valn << 17) >>> 31; + out[currentPos++] = (valn << 18) >>> 31; + out[currentPos++] = (valn << 19) >>> 31; + out[currentPos++] = (valn << 20) >>> 31; + out[currentPos++] = (valn << 21) >>> 31; + out[currentPos++] = (valn << 22) >>> 31; + out[currentPos++] = (valn << 23) >>> 31; // 20 + out[currentPos++] = (valn << 24) >>> 31; + out[currentPos++] = (valn << 25) >>> 31; + out[currentPos++] = (valn << 26) >>> 31; + out[currentPos++] = (valn << 27) >>> 31; + out[currentPos++] = (valn << 28) >>> 31; + out[currentPos++] = (valn << 29) >>> 31; + out[currentPos++] = (valn << 30) >>> 31; + out[currentPos++] = (valn << 31) >>> 31; + } + + private void decode17(int val, int valn, int[] out, int currentPos) { + // number : 14, bitwidth : 2 + out[currentPos++] = (val << 8) >>> 30; + out[currentPos++] = (val << 10) >>> 30; + out[currentPos++] = (val << 12) >>> 30; + out[currentPos++] = (val << 14) >>> 30; + out[currentPos++] = (val << 16) >>> 30; + out[currentPos++] = (val << 18) >>> 30; + out[currentPos++] = (val << 20) >>> 30; + out[currentPos++] = (val << 22) >>> 30; // 10 + out[currentPos++] = (val << 24) >>> 30; + out[currentPos++] = (val << 26) >>> 30; + out[currentPos++] = (val << 28) >>> 30; + out[currentPos++] = (val << 30) >>> 30; + out[currentPos++] = (valn << 0) >>> 30; + out[currentPos++] = (valn << 2) >>> 30; + // number : 1, bitwidth : 28 + out[currentPos++] = (valn << 4) >>> 4; + } + + private void decode16(int val, int valn, int[] out, int currentPos) { + // number : 14, bitwidth : 2 + out[currentPos++] = (val << 8) >>> 30; + out[currentPos++] = (val << 10) >>> 30; + out[currentPos++] = (val << 12) >>> 30; + out[currentPos++] = (val << 14) >>> 30; + out[currentPos++] = (val << 16) >>> 30; + out[currentPos++] = (val << 18) >>> 30; + out[currentPos++] = (val << 20) >>> 30; + out[currentPos++] = (val << 22) >>> 30; // 10 + out[currentPos++] = (val << 24) >>> 30; + out[currentPos++] = (val << 26) >>> 30; + out[currentPos++] = (val << 28) >>> 30; + out[currentPos++] = (val << 30) >>> 30; + out[currentPos++] = (valn << 0) >>> 30; + out[currentPos++] = (valn << 2) >>> 30; + // number : 2, bitwidth : 14 + out[currentPos++] = (valn << 4) >>> 18; + out[currentPos++] = (valn << 18) >>> 18; + } + + private void decode15(int val, int valn, int[] out, int currentPos) { + // number : 14, bitwidth : 2 + out[currentPos++] = (val << 8) >>> 30; + out[currentPos++] = (val << 10) >>> 30; + out[currentPos++] = (val << 12) >>> 30; + out[currentPos++] = (val << 14) >>> 30; + out[currentPos++] = (val << 16) >>> 30; + out[currentPos++] = (val << 18) >>> 30; + out[currentPos++] = (val << 20) >>> 30; + out[currentPos++] = (val << 22) >>> 30; // 10 + out[currentPos++] = (val << 24) >>> 30; + out[currentPos++] = (val << 26) >>> 30; + out[currentPos++] = (val << 28) >>> 30; + out[currentPos++] = (val << 30) >>> 30; + out[currentPos++] = (valn << 1) >>> 30; + out[currentPos++] = (valn << 3) >>> 30; + // number : 3, bitwidth : 9 + out[currentPos++] = (valn << 5) >>> 23; + out[currentPos++] = (valn << 14) >>> 23; + out[currentPos++] = (valn << 23) >>> 23; + } + + private void decode14(int val, int valn, int[] out, int currentPos) { + // number : 14, bitwidth : 2 + out[currentPos++] = (val << 8) >>> 30; + out[currentPos++] = (val << 10) >>> 30; + out[currentPos++] = (val << 12) >>> 30; + out[currentPos++] = (val << 14) >>> 30; + out[currentPos++] = (val << 16) >>> 30; + out[currentPos++] = (val << 18) >>> 30; + out[currentPos++] = (val << 20) >>> 30; + out[currentPos++] = (val << 22) >>> 30; // 10 + out[currentPos++] = (val << 24) >>> 30; + out[currentPos++] = (val << 26) >>> 30; + out[currentPos++] = (val << 28) >>> 30; + out[currentPos++] = (val << 30) >>> 30; + out[currentPos++] = (valn << 0) >>> 30; + out[currentPos++] = (valn << 2) >>> 30; + // number : 4, bitwidth : 7 + out[currentPos++] = (valn << 4) >>> 25; + out[currentPos++] = (valn << 11) >>> 25; + out[currentPos++] = (valn << 18) >>> 25; + out[currentPos++] = (valn << 25) >>> 25; + } + + private void decode13(int val, int valn, int[] out, int currentPos) { + // number : 14, bitwidth : 2 + out[currentPos++] = (val << 8) >>> 30; + out[currentPos++] = (val << 10) >>> 30; + out[currentPos++] = (val << 12) >>> 30; + out[currentPos++] = (val << 14) >>> 30; + out[currentPos++] = (val << 16) >>> 30; + out[currentPos++] = (val << 18) >>> 30; + out[currentPos++] = (val << 20) >>> 30; + out[currentPos++] = (val << 22) >>> 30; // 10 + out[currentPos++] = (val << 24) >>> 30; + out[currentPos++] = (val << 26) >>> 30; + out[currentPos++] = (val << 28) >>> 30; + out[currentPos++] = (val << 30) >>> 30; + out[currentPos++] = (valn << 3) >>> 30; + out[currentPos++] = (valn << 5) >>> 30; + // number : 5, bitwidth : 5 + out[currentPos++] = (valn << 7) >>> 27; + out[currentPos++] = (valn << 12) >>> 27; + out[currentPos++] = (valn << 17) >>> 27; + out[currentPos++] = (valn << 22) >>> 27; + out[currentPos++] = (valn << 27) >>> 27; + + } + + private void decode12(int val, int valn, int[] out, int currentPos) { + // number : 14, bitwidth : 2 + out[currentPos++] = (val << 8) >>> 30; + out[currentPos++] = (val << 10) >>> 30; + out[currentPos++] = (val << 12) >>> 30; + out[currentPos++] = (val << 14) >>> 30; + out[currentPos++] = (val << 16) >>> 30; + out[currentPos++] = (val << 18) >>> 30; + out[currentPos++] = (val << 20) >>> 30; + out[currentPos++] = (val << 22) >>> 30; // 10 + out[currentPos++] = (val << 24) >>> 30; + out[currentPos++] = (val << 26) >>> 30; + out[currentPos++] = (val << 28) >>> 30; + out[currentPos++] = (val << 30) >>> 30; + out[currentPos++] = (valn << 0) >>> 30; + out[currentPos++] = (valn << 2) >>> 30; + // number : 7, bitwidth : 4 + out[currentPos++] = (valn << 4) >>> 28; + out[currentPos++] = (valn << 8) >>> 28; + out[currentPos++] = (valn << 12) >>> 28; + out[currentPos++] = (valn << 16) >>> 28; + out[currentPos++] = (valn << 20) >>> 28; + out[currentPos++] = (valn << 24) >>> 28; + out[currentPos++] = (valn << 28) >>> 28; + + } + + private void decode11(int val, int valn, int[] out, int currentPos) { + // number : 14, bitwidth : 2 + out[currentPos++] = (val << 8) >>> 30; + out[currentPos++] = (val << 10) >>> 30; + out[currentPos++] = (val << 12) >>> 30; + out[currentPos++] = (val << 14) >>> 30; + out[currentPos++] = (val << 16) >>> 30; + out[currentPos++] = (val << 18) >>> 30; + out[currentPos++] = (val << 20) >>> 30; + out[currentPos++] = (val << 22) >>> 30; // 10 + out[currentPos++] = (val << 24) >>> 30; + out[currentPos++] = (val << 26) >>> 30; + out[currentPos++] = (val << 28) >>> 30; + out[currentPos++] = (val << 30) >>> 30; + out[currentPos++] = (valn << 1) >>> 30; + out[currentPos++] = (valn << 3) >>> 30; + // number : 9, bitwidth : 3 + out[currentPos++] = (valn << 5) >>> 29; + out[currentPos++] = (valn << 8) >>> 29; + out[currentPos++] = (valn << 11) >>> 29; + out[currentPos++] = (valn << 14) >>> 29; + out[currentPos++] = (valn << 17) >>> 29; + out[currentPos++] = (valn << 20) >>> 29; + out[currentPos++] = (valn << 23) >>> 29; + out[currentPos++] = (valn << 26) >>> 29; + out[currentPos++] = (valn << 29) >>> 29; + + } + + private void decode10(int val, int valn, int[] out, int currentPos) { + // number : 14, bitwidth : 2 + out[currentPos++] = (val << 8) >>> 30; + out[currentPos++] = (val << 10) >>> 30; + out[currentPos++] = (val << 12) >>> 30; + out[currentPos++] = (val << 14) >>> 30; + out[currentPos++] = (val << 16) >>> 30; + out[currentPos++] = (val << 18) >>> 30; + out[currentPos++] = (val << 20) >>> 30; + out[currentPos++] = (val << 22) >>> 30; // 10 + out[currentPos++] = (val << 24) >>> 30; + out[currentPos++] = (val << 26) >>> 30; + out[currentPos++] = (val << 28) >>> 30; + out[currentPos++] = (val << 30) >>> 30; + out[currentPos++] = (valn << 0) >>> 30; + out[currentPos++] = (valn << 2) >>> 30; + // number : 14, bitwidth : 2 + out[currentPos++] = (valn << 4) >>> 30; + out[currentPos++] = (valn << 6) >>> 30; + out[currentPos++] = (valn << 8) >>> 30; + out[currentPos++] = (valn << 10) >>> 30; + out[currentPos++] = (valn << 12) >>> 30; + out[currentPos++] = (valn << 14) >>> 30; + out[currentPos++] = (valn << 16) >>> 30; + out[currentPos++] = (valn << 18) >>> 30; + out[currentPos++] = (valn << 20) >>> 30; + out[currentPos++] = (valn << 22) >>> 30; // 10 + out[currentPos++] = (valn << 24) >>> 30; + out[currentPos++] = (valn << 26) >>> 30; + out[currentPos++] = (valn << 28) >>> 30; + out[currentPos++] = (valn << 30) >>> 30; + } + + private void decode9(int val, int valn, int[] out, int currentPos) { + // number : 14, bitwidth : 2 + out[currentPos++] = (val << 8) >>> 30; + out[currentPos++] = (val << 10) >>> 30; + out[currentPos++] = (val << 12) >>> 30; + out[currentPos++] = (val << 14) >>> 30; + out[currentPos++] = (val << 16) >>> 30; + out[currentPos++] = (val << 18) >>> 30; + out[currentPos++] = (val << 20) >>> 30; + out[currentPos++] = (val << 22) >>> 30; // 10 + out[currentPos++] = (val << 24) >>> 30; + out[currentPos++] = (val << 26) >>> 30; + out[currentPos++] = (val << 28) >>> 30; + out[currentPos++] = (val << 30) >>> 30; + out[currentPos++] = (valn << 0) >>> 30; + out[currentPos++] = (valn << 2) >>> 30; + // number : 28, bitwidth : 1 + out[currentPos++] = (valn << 4) >>> 31; + out[currentPos++] = (valn << 5) >>> 31; + out[currentPos++] = (valn << 6) >>> 31; + out[currentPos++] = (valn << 7) >>> 31; + out[currentPos++] = (valn << 8) >>> 31; + out[currentPos++] = (valn << 9) >>> 31; + out[currentPos++] = (valn << 10) >>> 31; + out[currentPos++] = (valn << 11) >>> 31; + out[currentPos++] = (valn << 12) >>> 31; + out[currentPos++] = (valn << 13) >>> 31; // 10 + out[currentPos++] = (valn << 14) >>> 31; + out[currentPos++] = (valn << 15) >>> 31; + out[currentPos++] = (valn << 16) >>> 31; + out[currentPos++] = (valn << 17) >>> 31; + out[currentPos++] = (valn << 18) >>> 31; + out[currentPos++] = (valn << 19) >>> 31; + out[currentPos++] = (valn << 20) >>> 31; + out[currentPos++] = (valn << 21) >>> 31; + out[currentPos++] = (valn << 22) >>> 31; + out[currentPos++] = (valn << 23) >>> 31; // 20 + out[currentPos++] = (valn << 24) >>> 31; + out[currentPos++] = (valn << 25) >>> 31; + out[currentPos++] = (valn << 26) >>> 31; + out[currentPos++] = (valn << 27) >>> 31; + out[currentPos++] = (valn << 28) >>> 31; + out[currentPos++] = (valn << 29) >>> 31; + out[currentPos++] = (valn << 30) >>> 31; + out[currentPos++] = (valn << 31) >>> 31; + } + + private void decode8(int val, int valn, int[] out, int currentPos) { + // number : 28, bitwidth : 1 + out[currentPos++] = (val << 8) >>> 31; + out[currentPos++] = (val << 9) >>> 31; + out[currentPos++] = (val << 10) >>> 31; + out[currentPos++] = (val << 11) >>> 31; + out[currentPos++] = (val << 12) >>> 31; + out[currentPos++] = (val << 13) >>> 31; // 10 + out[currentPos++] = (val << 14) >>> 31; + out[currentPos++] = (val << 15) >>> 31; + out[currentPos++] = (val << 16) >>> 31; + out[currentPos++] = (val << 17) >>> 31; + out[currentPos++] = (val << 18) >>> 31; + out[currentPos++] = (val << 19) >>> 31; + out[currentPos++] = (val << 20) >>> 31; + out[currentPos++] = (val << 21) >>> 31; + out[currentPos++] = (val << 22) >>> 31; + out[currentPos++] = (val << 23) >>> 31; // 20 + out[currentPos++] = (val << 24) >>> 31; + out[currentPos++] = (val << 25) >>> 31; + out[currentPos++] = (val << 26) >>> 31; + out[currentPos++] = (val << 27) >>> 31; + out[currentPos++] = (val << 28) >>> 31; + out[currentPos++] = (val << 29) >>> 31; + out[currentPos++] = (val << 30) >>> 31; + out[currentPos++] = (val << 31) >>> 31; + out[currentPos++] = valn >>> 31; + out[currentPos++] = (valn << 1) >>> 31; + out[currentPos++] = (valn << 2) >>> 31; + out[currentPos++] = (valn << 3) >>> 31; + // number : 1, bitwidth : 28 + out[currentPos++] = (valn << 4) >>> 4; + } + + private void decode7(int val, int valn, int[] out, int currentPos) { + // number : 28, bitwidth : 1 + out[currentPos++] = (val << 8) >>> 31; + out[currentPos++] = (val << 9) >>> 31; + out[currentPos++] = (val << 10) >>> 31; + out[currentPos++] = (val << 11) >>> 31; + out[currentPos++] = (val << 12) >>> 31; + out[currentPos++] = (val << 13) >>> 31; // 10 + out[currentPos++] = (val << 14) >>> 31; + out[currentPos++] = (val << 15) >>> 31; + out[currentPos++] = (val << 16) >>> 31; + out[currentPos++] = (val << 17) >>> 31; + out[currentPos++] = (val << 18) >>> 31; + out[currentPos++] = (val << 19) >>> 31; + out[currentPos++] = (val << 20) >>> 31; + out[currentPos++] = (val << 21) >>> 31; + out[currentPos++] = (val << 22) >>> 31; + out[currentPos++] = (val << 23) >>> 31; // 20 + out[currentPos++] = (val << 24) >>> 31; + out[currentPos++] = (val << 25) >>> 31; + out[currentPos++] = (val << 26) >>> 31; + out[currentPos++] = (val << 27) >>> 31; + out[currentPos++] = (val << 28) >>> 31; + out[currentPos++] = (val << 29) >>> 31; + out[currentPos++] = (val << 30) >>> 31; + out[currentPos++] = (val << 31) >>> 31; + out[currentPos++] = valn >>> 31; + out[currentPos++] = (valn << 1) >>> 31; + out[currentPos++] = (valn << 2) >>> 31; + out[currentPos++] = (valn << 3) >>> 31; + // number : 2, bitwidth : 14 + out[currentPos++] = (valn << 4) >>> 18; + out[currentPos++] = (valn << 18) >>> 18; + } + + private void decode6(int val, int valn, int[] out, int currentPos) { + // number : 28, bitwidth : 1 + out[currentPos++] = (val << 8) >>> 31; + out[currentPos++] = (val << 9) >>> 31; + out[currentPos++] = (val << 10) >>> 31; + out[currentPos++] = (val << 11) >>> 31; + out[currentPos++] = (val << 12) >>> 31; + out[currentPos++] = (val << 13) >>> 31; // 10 + out[currentPos++] = (val << 14) >>> 31; + out[currentPos++] = (val << 15) >>> 31; + out[currentPos++] = (val << 16) >>> 31; + out[currentPos++] = (val << 17) >>> 31; + out[currentPos++] = (val << 18) >>> 31; + out[currentPos++] = (val << 19) >>> 31; + out[currentPos++] = (val << 20) >>> 31; + out[currentPos++] = (val << 21) >>> 31; + out[currentPos++] = (val << 22) >>> 31; + out[currentPos++] = (val << 23) >>> 31; // 20 + out[currentPos++] = (val << 24) >>> 31; + out[currentPos++] = (val << 25) >>> 31; + out[currentPos++] = (val << 26) >>> 31; + out[currentPos++] = (val << 27) >>> 31; + out[currentPos++] = (val << 28) >>> 31; + out[currentPos++] = (val << 29) >>> 31; + out[currentPos++] = (val << 30) >>> 31; + out[currentPos++] = (val << 31) >>> 31; + out[currentPos++] = (valn << 1) >>> 31; + out[currentPos++] = (valn << 2) >>> 31; + out[currentPos++] = (valn << 3) >>> 31; + out[currentPos++] = (valn << 4) >>> 31; + // number : 3, bitwidth : 9 + out[currentPos++] = (valn << 5) >>> 23; + out[currentPos++] = (valn << 14) >>> 23; + out[currentPos++] = (valn << 23) >>> 23; + } + + private void decode5(int val, int valn, int[] out, int currentPos) { + // number : 28, bitwidth : 1 + out[currentPos++] = (val << 8) >>> 31; + out[currentPos++] = (val << 9) >>> 31; + out[currentPos++] = (val << 10) >>> 31; + out[currentPos++] = (val << 11) >>> 31; + out[currentPos++] = (val << 12) >>> 31; + out[currentPos++] = (val << 13) >>> 31; // 10 + out[currentPos++] = (val << 14) >>> 31; + out[currentPos++] = (val << 15) >>> 31; + out[currentPos++] = (val << 16) >>> 31; + out[currentPos++] = (val << 17) >>> 31; + out[currentPos++] = (val << 18) >>> 31; + out[currentPos++] = (val << 19) >>> 31; + out[currentPos++] = (val << 20) >>> 31; + out[currentPos++] = (val << 21) >>> 31; + out[currentPos++] = (val << 22) >>> 31; + out[currentPos++] = (val << 23) >>> 31; // 20 + out[currentPos++] = (val << 24) >>> 31; + out[currentPos++] = (val << 25) >>> 31; + out[currentPos++] = (val << 26) >>> 31; + out[currentPos++] = (val << 27) >>> 31; + out[currentPos++] = (val << 28) >>> 31; + out[currentPos++] = (val << 29) >>> 31; + out[currentPos++] = (val << 30) >>> 31; + out[currentPos++] = (val << 31) >>> 31; + out[currentPos++] = valn >>> 31; + out[currentPos++] = (valn << 1) >>> 31; + out[currentPos++] = (valn << 2) >>> 31; + out[currentPos++] = (valn << 3) >>> 31; + // number : 4, bitwidth : 7 + out[currentPos++] = (valn << 4) >>> 25; + out[currentPos++] = (valn << 11) >>> 25; + out[currentPos++] = (valn << 18) >>> 25; + out[currentPos++] = (valn << 25) >>> 25; + } + + private void decode4(int val, int valn, int[] out, int currentPos) { + // number : 28, bitwidth : 1 + out[currentPos++] = (val << 8) >>> 31; + out[currentPos++] = (val << 9) >>> 31; + out[currentPos++] = (val << 10) >>> 31; + out[currentPos++] = (val << 11) >>> 31; + out[currentPos++] = (val << 12) >>> 31; + out[currentPos++] = (val << 13) >>> 31; // 10 + out[currentPos++] = (val << 14) >>> 31; + out[currentPos++] = (val << 15) >>> 31; + out[currentPos++] = (val << 16) >>> 31; + out[currentPos++] = (val << 17) >>> 31; + out[currentPos++] = (val << 18) >>> 31; + out[currentPos++] = (val << 19) >>> 31; + out[currentPos++] = (val << 20) >>> 31; + out[currentPos++] = (val << 21) >>> 31; + out[currentPos++] = (val << 22) >>> 31; + out[currentPos++] = (val << 23) >>> 31; // 20 + out[currentPos++] = (val << 24) >>> 31; + out[currentPos++] = (val << 25) >>> 31; + out[currentPos++] = (val << 26) >>> 31; + out[currentPos++] = (val << 27) >>> 31; + out[currentPos++] = (val << 28) >>> 31; + out[currentPos++] = (val << 29) >>> 31; + out[currentPos++] = (val << 30) >>> 31; + out[currentPos++] = (val << 31) >>> 31; + out[currentPos++] = (valn << 3) >>> 31;// 头部3bit + out[currentPos++] = (valn << 4) >>> 31; + out[currentPos++] = (valn << 5) >>> 31; + out[currentPos++] = (valn << 6) >>> 31; + // number : 5, bitwidth : 5 + out[currentPos++] = (valn << 7) >>> 27; + out[currentPos++] = (valn << 12) >>> 27; + out[currentPos++] = (valn << 17) >>> 27; + out[currentPos++] = (valn << 22) >>> 27; + out[currentPos++] = (valn << 27) >>> 27; + } + + private void decode3(int val, int valn, int[] out, int currentPos) { + // number : 28, bitwidth : 1 + out[currentPos++] = (val << 8) >>> 31; + out[currentPos++] = (val << 9) >>> 31; + out[currentPos++] = (val << 10) >>> 31; + out[currentPos++] = (val << 11) >>> 31; + out[currentPos++] = (val << 12) >>> 31; + out[currentPos++] = (val << 13) >>> 31; // 10 + out[currentPos++] = (val << 14) >>> 31; + out[currentPos++] = (val << 15) >>> 31; + out[currentPos++] = (val << 16) >>> 31; + out[currentPos++] = (val << 17) >>> 31; + out[currentPos++] = (val << 18) >>> 31; + out[currentPos++] = (val << 19) >>> 31; + out[currentPos++] = (val << 20) >>> 31; + out[currentPos++] = (val << 21) >>> 31; + out[currentPos++] = (val << 22) >>> 31; + out[currentPos++] = (val << 23) >>> 31; // 20 + out[currentPos++] = (val << 24) >>> 31; + out[currentPos++] = (val << 25) >>> 31; + out[currentPos++] = (val << 26) >>> 31; + out[currentPos++] = (val << 27) >>> 31; + out[currentPos++] = (val << 28) >>> 31; + out[currentPos++] = (val << 29) >>> 31; + out[currentPos++] = (val << 30) >>> 31; + out[currentPos++] = (val << 31) >>> 31; + out[currentPos++] = valn >>> 31; + out[currentPos++] = (valn << 1) >>> 31; + out[currentPos++] = (valn << 2) >>> 31; + out[currentPos++] = (valn << 3) >>> 31; + // number : 7, bitwidth : 4 + out[currentPos++] = (valn << 4) >>> 28; + out[currentPos++] = (valn << 8) >>> 28; + out[currentPos++] = (valn << 12) >>> 28; + out[currentPos++] = (valn << 16) >>> 28; + out[currentPos++] = (valn << 20) >>> 28; + out[currentPos++] = (valn << 24) >>> 28; + out[currentPos++] = (valn << 28) >>> 28; + } + + private void decode2(int val, int valn, int[] out, int currentPos) { + // number : 28, bitwidth : 1 + out[currentPos++] = (val << 8) >>> 31; + out[currentPos++] = (val << 9) >>> 31; + out[currentPos++] = (val << 10) >>> 31; + out[currentPos++] = (val << 11) >>> 31; + out[currentPos++] = (val << 12) >>> 31; + out[currentPos++] = (val << 13) >>> 31; // 10 + out[currentPos++] = (val << 14) >>> 31; + out[currentPos++] = (val << 15) >>> 31; + out[currentPos++] = (val << 16) >>> 31; + out[currentPos++] = (val << 17) >>> 31; + out[currentPos++] = (val << 18) >>> 31; + out[currentPos++] = (val << 19) >>> 31; + out[currentPos++] = (val << 20) >>> 31; + out[currentPos++] = (val << 21) >>> 31; + out[currentPos++] = (val << 22) >>> 31; + out[currentPos++] = (val << 23) >>> 31; // 20 + out[currentPos++] = (val << 24) >>> 31; + out[currentPos++] = (val << 25) >>> 31; + out[currentPos++] = (val << 26) >>> 31; + out[currentPos++] = (val << 27) >>> 31; + out[currentPos++] = (val << 28) >>> 31; + out[currentPos++] = (val << 29) >>> 31; + out[currentPos++] = (val << 30) >>> 31; + out[currentPos++] = (val << 31) >>> 31; + out[currentPos++] = (valn << 1) >>> 31;// 头部1bit + out[currentPos++] = (valn << 2) >>> 31; + out[currentPos++] = (valn << 3) >>> 31; + out[currentPos++] = (valn << 4) >>> 31; + // number : 9, bitwidth : 3 + out[currentPos++] = (valn << 5) >>> 29; + out[currentPos++] = (valn << 8) >>> 29; + out[currentPos++] = (valn << 11) >>> 29; + out[currentPos++] = (valn << 14) >>> 29; + out[currentPos++] = (valn << 17) >>> 29; + out[currentPos++] = (valn << 20) >>> 29; + out[currentPos++] = (valn << 23) >>> 29; + out[currentPos++] = (valn << 26) >>> 29; + out[currentPos++] = (valn << 29) >>> 29; + } + + private void decode1(int val, int valn, int[] out, int currentPos) { + // number : 28, bitwidth : 1 + out[currentPos++] = (val << 8) >>> 31; + out[currentPos++] = (val << 9) >>> 31; + out[currentPos++] = (val << 10) >>> 31; + out[currentPos++] = (val << 11) >>> 31; + out[currentPos++] = (val << 12) >>> 31; + out[currentPos++] = (val << 13) >>> 31; // 10 + out[currentPos++] = (val << 14) >>> 31; + out[currentPos++] = (val << 15) >>> 31; + out[currentPos++] = (val << 16) >>> 31; + out[currentPos++] = (val << 17) >>> 31; + out[currentPos++] = (val << 18) >>> 31; + out[currentPos++] = (val << 19) >>> 31; + out[currentPos++] = (val << 20) >>> 31; + out[currentPos++] = (val << 21) >>> 31; + out[currentPos++] = (val << 22) >>> 31; + out[currentPos++] = (val << 23) >>> 31;// 20 + out[currentPos++] = (val << 24) >>> 31; + out[currentPos++] = (val << 25) >>> 31; + out[currentPos++] = (val << 26) >>> 31; + out[currentPos++] = (val << 27) >>> 31; + out[currentPos++] = (val << 28) >>> 31; + out[currentPos++] = (val << 29) >>> 31; + out[currentPos++] = (val << 30) >>> 31; + out[currentPos++] = (val << 31) >>> 31; + out[currentPos++] = valn >>> 31; + out[currentPos++] = (valn << 1) >>> 31; + out[currentPos++] = (valn << 2) >>> 31; + out[currentPos++] = (valn << 3) >>> 31; + // number : 14, bitwidth : 2 + out[currentPos++] = (valn << 4) >>> 30; + out[currentPos++] = (valn << 6) >>> 30; + out[currentPos++] = (valn << 8) >>> 30; + out[currentPos++] = (valn << 10) >>> 30; + out[currentPos++] = (valn << 12) >>> 30; + out[currentPos++] = (valn << 14) >>> 30; + out[currentPos++] = (valn << 16) >>> 30; + out[currentPos++] = (valn << 18) >>> 30; + out[currentPos++] = (valn << 20) >>> 30; + out[currentPos++] = (valn << 22) >>> 30; // 10 + out[currentPos++] = (valn << 24) >>> 30; + out[currentPos++] = (valn << 26) >>> 30; + out[currentPos++] = (valn << 28) >>> 30; + out[currentPos++] = (valn << 30) >>> 30; + } + + private void decode0(int val, int valn, int[] out, int currentPos) { + // number : 28, bitwidth : 1 + out[currentPos++] = (val << 8) >>> 31; + out[currentPos++] = (val << 9) >>> 31; + out[currentPos++] = (val << 10) >>> 31; + out[currentPos++] = (val << 11) >>> 31; + out[currentPos++] = (val << 12) >>> 31; + out[currentPos++] = (val << 13) >>> 31; // 10 + out[currentPos++] = (val << 14) >>> 31; + out[currentPos++] = (val << 15) >>> 31; + out[currentPos++] = (val << 16) >>> 31; + out[currentPos++] = (val << 17) >>> 31; + out[currentPos++] = (val << 18) >>> 31; + out[currentPos++] = (val << 19) >>> 31; + out[currentPos++] = (val << 20) >>> 31; + out[currentPos++] = (val << 21) >>> 31; + out[currentPos++] = (val << 22) >>> 31; + out[currentPos++] = (val << 23) >>> 31; // 20 + out[currentPos++] = (val << 24) >>> 31; + out[currentPos++] = (val << 25) >>> 31; + out[currentPos++] = (val << 26) >>> 31; + out[currentPos++] = (val << 27) >>> 31; + out[currentPos++] = (val << 28) >>> 31; + out[currentPos++] = (val << 29) >>> 31; + out[currentPos++] = (val << 30) >>> 31; + out[currentPos++] = (val << 31) >>> 31; + out[currentPos++] = valn >>> 31; + out[currentPos++] = (valn << 1) >>> 31; + out[currentPos++] = (valn << 2) >>> 31; + out[currentPos++] = (valn << 3) >>> 31; + // number : 28, bitwidth : 1 + out[currentPos++] = (valn << 4) >>> 31; + out[currentPos++] = (valn << 5) >>> 31; + out[currentPos++] = (valn << 6) >>> 31; + out[currentPos++] = (valn << 7) >>> 31; + out[currentPos++] = (valn << 8) >>> 31; + out[currentPos++] = (valn << 9) >>> 31; + out[currentPos++] = (valn << 10) >>> 31; + out[currentPos++] = (valn << 11) >>> 31; + out[currentPos++] = (valn << 12) >>> 31; + out[currentPos++] = (valn << 13) >>> 31; // 10 + out[currentPos++] = (valn << 14) >>> 31; + out[currentPos++] = (valn << 15) >>> 31; + out[currentPos++] = (valn << 16) >>> 31; + out[currentPos++] = (valn << 17) >>> 31; + out[currentPos++] = (valn << 18) >>> 31; + out[currentPos++] = (valn << 19) >>> 31; + out[currentPos++] = (valn << 20) >>> 31; + out[currentPos++] = (valn << 21) >>> 31; + out[currentPos++] = (valn << 22) >>> 31; + out[currentPos++] = (valn << 23) >>> 31; // 20 + out[currentPos++] = (valn << 24) >>> 31; + out[currentPos++] = (valn << 25) >>> 31; + out[currentPos++] = (valn << 26) >>> 31; + out[currentPos++] = (valn << 27) >>> 31; + out[currentPos++] = (valn << 28) >>> 31; + out[currentPos++] = (valn << 29) >>> 31; + out[currentPos++] = (valn << 30) >>> 31; + out[currentPos++] = (valn << 31) >>> 31; + } + + + private final static int bitLength[] = { 1, 2, 3, 4, 5, 7, 9, 14, 28 }; + + private final static int codeNum[] = { 28, 14, 9, 7, 5, 4, 3, 2, 1 }; + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + @Override + public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { + int tmpoutpos = outpos.get(); + int currentPos = inpos.get(); + int selector1 = 0; + int selector2 = 0; + final int finalin = currentPos + inlength; + while (currentPos < finalin - 28 * 2) { + int nextCurrentPos = currentPos; + mainloop1: for (selector1=0; selector1 <= 8; selector1++) { + int compressedNum = codeNum[selector1]; + //if (finalin <= nextCurrentPos + compressedNum - 1) + // compressedNum = finalin - nextCurrentPos; + int b = bitLength[selector1]; + int max = 1 << b; + int i = 0; + for (; i < compressedNum; i++) { + if (Util.smallerorequalthan(max, in[nextCurrentPos + i])) + continue mainloop1; + } + nextCurrentPos += compressedNum; + break; + } + mainloop2: for (selector2 = 0; selector2 <= 8; selector2++) { + int compressedNum = codeNum[selector2]; + //if (finalin <= nextCurrentPos + compressedNum - 1) + // compressedNum = finalin - nextCurrentPos; + int b = bitLength[selector2]; + int max = 1 << b; + int i = 0; + for (; i < compressedNum; i++) { + if (Util.smallerorequalthan(max, in[nextCurrentPos + i])) + continue mainloop2; + } + nextCurrentPos += compressedNum; + break; + } + int code = M[selector1][selector2]; + out[tmpoutpos] = 0; + out[tmpoutpos + 1] = 0; + switch (code) { + case 0: + encode0(in, currentPos, code, out, tmpoutpos); + break; + case 1: + encode1(in, currentPos, code, out, tmpoutpos); + break; + case 2: + encode2(in, currentPos, code, out, tmpoutpos); + break; + case 3: + encode3(in, currentPos, code, out, tmpoutpos); + break; + case 4: + encode4(in, currentPos, code, out, tmpoutpos); + break; + case 5: + encode5(in, currentPos, code, out, tmpoutpos); + break; + case 6: + encode6(in, currentPos, code, out, tmpoutpos); + break; + case 7: + encode7(in, currentPos, code, out, tmpoutpos); + break; + case 8: + encode8(in, currentPos, code, out, tmpoutpos); + break; + case 9: + encode9(in, currentPos, code, out, tmpoutpos); + break; + case 10: + encode10(in, currentPos, code, out, tmpoutpos); + break; + case 11: + encode11(in, currentPos, code, out, tmpoutpos); + break; + case 12: + encode12(in, currentPos, code, out, tmpoutpos); + break; + case 13: + encode13(in, currentPos, code, out, tmpoutpos); + break; + case 14: + encode14(in, currentPos, code, out, tmpoutpos); + break; + case 15: + encode15(in, currentPos, code, out, tmpoutpos); + break; + case 16: + encode16(in, currentPos, code, out, tmpoutpos); + break; + case 17: + encode17(in, currentPos, code, out, tmpoutpos); + break; + case 18: + encode18(in, currentPos, code, out, tmpoutpos); + break; + case 19: + encode19(in, currentPos, code, out, tmpoutpos); + break; + case 20: + encode20(in, currentPos, code, out, tmpoutpos); + break; + case 21: + encode21(in, currentPos, code, out, tmpoutpos); + break; + case 22: + encode22(in, currentPos, code, out, tmpoutpos); + break; + case 23: + encode23(in, currentPos, code, out, tmpoutpos); + break; + case 24: + encode24(in, currentPos, code, out, tmpoutpos); + break; + case 25: + encode25(in, currentPos, code, out, tmpoutpos); + break; + case 26: + encode26(in, currentPos, code, out, tmpoutpos); + break; + case 27: + encode27(in, currentPos, code, out, tmpoutpos); + break; + case 28: + encode28(in, currentPos, code, out, tmpoutpos); + break; + case 29: + encode29(in, currentPos, code, out, tmpoutpos); + break; + case 30: + encode30(in, currentPos, code, out, tmpoutpos); + break; + case 31: + encode31(in, currentPos, code, out, tmpoutpos); + break; + case 32: + encode32(in, currentPos, code, out, tmpoutpos); + break; + case 33: + encode33(in, currentPos, code, out, tmpoutpos); + break; + case 34: + encode34(in, currentPos, code, out, tmpoutpos); + break; + case 35: + encode35(in, currentPos, code, out, tmpoutpos); + break; + case 36: + encode36(in, currentPos, code, out, tmpoutpos); + break; + case 37: + encode37(in, currentPos, code, out, tmpoutpos); + break; + case 38: + encode38(in, currentPos, code, out, tmpoutpos); + break; + case 39: + encode39(in, currentPos, code, out, tmpoutpos); + break; + case 40: + encode40(in, currentPos, code, out, tmpoutpos); + break; + case 41: + encode41(in, currentPos, code, out, tmpoutpos); + break; + case 42: + encode42(in, currentPos, code, out, tmpoutpos); + break; + case 43: + encode43(in, currentPos, code, out, tmpoutpos); + break; + case 44: + encode44(in, currentPos, code, out, tmpoutpos); + break; + case 45: + encode45(in, currentPos, code, out, tmpoutpos); + break; + case 46: + encode46(in, currentPos, code, out, tmpoutpos); + break; + case 47: + encode47(in, currentPos, code, out, tmpoutpos); + break; + case 48: + encode48(in, currentPos, code, out, tmpoutpos); + break; + case 49: + encode49(in, currentPos, code, out, tmpoutpos); + break; + case 50: + encode50(in, currentPos, code, out, tmpoutpos); + break; + case 51: + encode51(in, currentPos, code, out, tmpoutpos); + break; + case 52: + encode52(in, currentPos, code, out, tmpoutpos); + break; + case 53: + encode53(in, currentPos, code, out, tmpoutpos); + break; + case 54: + encode54(in, currentPos, code, out, tmpoutpos); + break; + case 55: + encode55(in, currentPos, code, out, tmpoutpos); + break; + case 56: + encode56(in, currentPos, code, out, tmpoutpos); + break; + case 57: + encode57(in, currentPos, code, out, tmpoutpos); + break; + case 58: + encode58(in, currentPos, code, out, tmpoutpos); + break; + case 59: + encode59(in, currentPos, code, out, tmpoutpos); + break; + case 60: + encode60(in, currentPos, code, out, tmpoutpos); + break; + case 61: + encode61(in, currentPos, code, out, tmpoutpos); + break; + case 62: + encode62(in, currentPos, code, out, tmpoutpos); + break; + case 63: + encode63(in, currentPos, code, out, tmpoutpos); + break; + case 64: + encode64(in, currentPos, code, out, tmpoutpos); + break; + case 65: + encode65(in, currentPos, code, out, tmpoutpos); + break; + case 66: + encode66(in, currentPos, code, out, tmpoutpos); + break; + case 67: + encode67(in, currentPos, code, out, tmpoutpos); + break; + case 68: + encode68(in, currentPos, code, out, tmpoutpos); + break; + case 69: + encode69(in, currentPos, code, out, tmpoutpos); + break; + case 70: + encode70(in, currentPos, code, out, tmpoutpos); + break; + case 71: + encode71(in, currentPos, code, out, tmpoutpos); + break; + case 72: + encode72(in, currentPos, code, out, tmpoutpos); + break; + case 73: + encode73(in, currentPos, code, out, tmpoutpos); + break; + case 74: + encode74(in, currentPos, code, out, tmpoutpos); + break; + case 75: + encode75(in, currentPos, code, out, tmpoutpos); + break; + case 76: + encode76(in, currentPos, code, out, tmpoutpos); + break; + case 77: + encode77(in, currentPos, code, out, tmpoutpos); + break; + case 78: + encode78(in, currentPos, code, out, tmpoutpos); + break; + case 79: + encode79(in, currentPos, code, out, tmpoutpos); + break; + case 80: + encode80(in, currentPos, code, out, tmpoutpos); + break; + default: + throw new RuntimeException("unsupported code"); + }// end switch + tmpoutpos += 2; + currentPos = nextCurrentPos; + } + + outer: while (currentPos < finalin) { + mainloop: for (int selector = 0; selector < 8; selector++) { + int res = 0; + int compressedNum = codeNum[selector]; + if (finalin <= currentPos + compressedNum - 1) + compressedNum = finalin - currentPos; + int b = bitLength[selector]; + int max = 1 << b; + int i = 0; + for (; i < compressedNum; i++) { + if (Util.smallerorequalthan(max, in[currentPos + i])) + continue mainloop; + res = (res << b) + in[currentPos + i]; + } + if (compressedNum != codeNum[selector]) { + res <<= (codeNum[selector] - compressedNum) * b; + } + res |= selector << 28; + out[tmpoutpos++] = res; + + currentPos += compressedNum; + continue outer; + } + final int selector = 8; + out[tmpoutpos++] = in[currentPos++] | (selector << 28); + } + inpos.set(currentPos); + outpos.set(tmpoutpos); + } + + @Override + public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos, int num) { + int currentPos = outpos.get(); + int tmpinpos = inpos.get(); + final int finalout = currentPos + num; + while (currentPos < finalout - 2 * 28) { + + int val = in[tmpinpos++]; + int valn = in[tmpinpos++]; + int header = val >>> 24; + switch (header) { + case 0: { + decode0(val, valn, out, currentPos); + currentPos+=56; + break; + } + case 1: { + decode1(val, valn, out, currentPos); + currentPos+=42; + break; + } + case 2: { + decode2(val, valn, out, currentPos); + currentPos+=37; + break; + } + case 3: { + decode3(val, valn, out, currentPos); + currentPos+=35; + break; + } + case 4: { + decode4(val, valn, out, currentPos); + currentPos+=33; + break; + } + case 5: { + decode5(val, valn, out, currentPos); + currentPos+=32; + break; + } + case 6: { + decode6(val, valn, out, currentPos); + currentPos+=31; + break; + } + case 7: { + decode7(val, valn, out, currentPos); + currentPos+=30; + break; + } + case 8: { + decode8(val, valn, out, currentPos); + currentPos+=29; + break; + } + case 9: { + decode9(val, valn, out, currentPos); + currentPos+=42; + break; + } + case 10: { + decode10(val, valn, out, currentPos); + currentPos+=28; + break; + } + case 11: { + decode11(val, valn, out, currentPos); + currentPos+=23; + break; + } + case 12: { + decode12(val, valn, out, currentPos); + currentPos+=21; + break; + } + case 13: { + decode13(val, valn, out, currentPos); + currentPos+=19; + break; + } + case 14: { + decode14(val, valn, out, currentPos); + currentPos+=18; + break; + } + case 15: { + decode15(val, valn, out, currentPos); + currentPos+=17; + break; + } + case 16: { + decode16(val, valn, out, currentPos); + currentPos+=16; + break; + } + case 17: { + decode17(val, valn, out, currentPos); + currentPos+=15; + break; + } + case 18: { + decode18(val, valn, out, currentPos); + currentPos+=37; + break; + } + case 19: { + decode19(val, valn, out, currentPos); + currentPos+=23; + break; + } + case 20: { + decode20(val, valn, out, currentPos); + currentPos+=18; + break; + } + case 21: { + decode21(val, valn, out, currentPos); + currentPos+=16; + break; + } + case 22: { + decode22(val, valn, out, currentPos); + currentPos+=14; + break; + } + case 23: { + decode23(val, valn, out, currentPos); + currentPos+=13; + break; + } + case 24: { + decode24(val, valn, out, currentPos); + currentPos+=12; + break; + } + case 25: { + decode25(val, valn, out, currentPos); + currentPos+=11; + break; + } + case 26: { + decode26(val, valn, out, currentPos); + currentPos+=10; + break; + } + case 27: { + decode27(val, valn, out, currentPos); + currentPos+=35; + break; + } + case 28: { + decode28(val, valn, out, currentPos); + currentPos+=21; + break; + } + case 29: { + decode29(val, valn, out, currentPos); + currentPos+=16; + break; + } + + case 30: { + decode30(val, valn, out, currentPos); + currentPos+=14; + break; + } + case 31: { + decode31(val, valn, out, currentPos); + currentPos+=12; + break; + } + case 32: { + decode32(val, valn, out, currentPos); + currentPos+=11; + break; + } + case 33: { + decode33(val, valn, out, currentPos); + currentPos+=10; + break; + } + case 34: { + decode34(val, valn, out, currentPos); + currentPos+=9; + break; + } + case 35: { + decode35(val, valn, out, currentPos); + currentPos+=8; + break; + } + case 36: { + decode36(val, valn, out, currentPos); + currentPos+=33; + break; + } + case 37: { + decode37(val, valn, out, currentPos); + currentPos+=19; + break; + } + case 38: { + decode38(val, valn, out, currentPos); + currentPos+=14; + break; + } + case 39: { + decode39(val, valn, out, currentPos); + currentPos+=12; + break; + } + case 40: { + decode40(val, valn, out, currentPos); + currentPos+=10; + break; + } + case 41: { + decode41(val, valn, out, currentPos); + currentPos+=9; + break; + } + case 42: { + decode42(val, valn, out, currentPos); + currentPos+=8; + break; + } + case 43: { + decode43(val, valn, out, currentPos); + currentPos+=7; + break; + } + case 44: { + decode44(val, valn, out, currentPos); + currentPos+=6; + break; + } + case 45: { + decode45(val, valn, out, currentPos); + currentPos+=32; + break; + } + case 46: { + decode46(val, valn, out, currentPos); + currentPos+=18; + break; + } + case 47: { + decode47(val, valn, out, currentPos); + currentPos+=13; + break; + } + case 48: { + decode48(val, valn, out, currentPos); + currentPos+=11; + break; + } + case 49: { + decode49(val, valn, out, currentPos); + currentPos+=9; + break; + } + case 50: { + decode50(val, valn, out, currentPos); + currentPos+=8; + break; + } + case 51: { + decode51(val, valn, out, currentPos); + currentPos+=7; + break; + } + case 52: { + decode52(val, valn, out, currentPos); + currentPos+=6; + break; + } + case 53: { + decode53(val, valn, out, currentPos); + currentPos+=5; + break; + } + case 54: { + decode54(val, valn, out, currentPos); + currentPos+=31; + break; + } + case 55: { + decode55(val, valn, out, currentPos); + currentPos+=17; + break; + } + case 56: { + decode56(val, valn, out, currentPos); + currentPos+=12; + break; + } + case 57: { + decode57(val, valn, out, currentPos); + currentPos+=10; + break; + } + case 58: { + decode58(val, valn, out, currentPos); + currentPos+=8; + break; + } + case 59: { + decode59(val, valn, out, currentPos); + currentPos+=7; + break; + } + case 60: { + decode60(val, valn, out, currentPos); + currentPos+=6; + break; + } + case 61: { + decode61(val, valn, out, currentPos); + currentPos+=5; + break; + } + case 62: { + decode62(val, valn, out, currentPos); + currentPos+=4; + break; + } + case 63: { + decode63(val, valn, out, currentPos); + currentPos+=30; + break; + } + case 64: { + decode64(val, valn, out, currentPos); + currentPos+=16; + break; + } + case 65: { + decode65(val, valn, out, currentPos); + currentPos+=11; + break; + } + case 66: { + decode66(val, valn, out, currentPos); + currentPos+=9; + break; + } + case 67: { + decode67(val, valn, out, currentPos); + currentPos+=7; + break; + } + case 68: { + decode68(val, valn, out, currentPos); + currentPos+=6; + break; + } + case 69: { + decode69(val, valn, out, currentPos); + currentPos+=5; + break; + } + case 70: { + decode70(val, valn, out, currentPos); + currentPos+=4; + break; + } + case 71: { + decode71(val, valn, out, currentPos); + currentPos+=3; + break; + } + case 72: { + decode72(val, valn, out, currentPos); + currentPos+=29; + break; + } + case 73: { + decode73(val, valn, out, currentPos); + currentPos+=15; + break; + } + case 74: { + decode74(val, valn, out, currentPos); + currentPos+=10; + break; + } + case 75: { + decode75(val, valn, out, currentPos); + currentPos+=8; + break; + } + case 76: { + decode76(val, valn, out, currentPos); + currentPos+=6; + break; + } + case 77: { + decode77(val, valn, out, currentPos); + currentPos+=5; + break; + } + case 78: { + decode78(val, valn, out, currentPos); + currentPos+=4; + break; + } + case 79: { + decode79(val, valn, out, currentPos); + currentPos+=3; + break; + } + case 80: { + decode80(val, valn, out, currentPos); + currentPos+=2; + break; + } + default: + throw new RuntimeException("Wrong code: " + header); + }// end switch + } // end while + + while (currentPos < finalout) { + int val = in[tmpinpos++]; + int header = val >>> 28; + switch (header) { + case 0: { // number : 28, bitwidth : 1 + final int howmany = finalout - currentPos < 28 ? finalout - currentPos : 28; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (k + 4)) >>> 31; + } + break; + } + case 1: { // number : 14, bitwidth : 2 + final int howmany = finalout - currentPos < 14 ? finalout - currentPos : 14; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (2 * k + 4)) >>> 30; + } + break; + } + case 2: { // number : 9, bitwidth : 3 + final int howmany = finalout - currentPos < 9 ? finalout - currentPos : 9; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (3 * k + 5)) >>> 29; + } + break; + } + case 3: { // number : 7, bitwidth : 4 + final int howmany = finalout - currentPos < 7 ? finalout - currentPos : 7; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (4 * k + 4)) >>> 28; + } + break; + } + case 4: { // number : 5, bitwidth : 5 + final int howmany = finalout - currentPos < 5 ? finalout - currentPos : 5; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (5 * k + 7)) >>> 27; + } + break; + } + case 5: { // number : 4, bitwidth : 7 + final int howmany = finalout - currentPos < 4 ? finalout - currentPos : 4; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (7 * k + 4)) >>> 25; + } + break; + } + case 6: { // number : 3, bitwidth : 9 + final int howmany = finalout - currentPos < 3 ? finalout - currentPos : 3; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (9 * k + 5)) >>> 23; + } + break; + } + case 7: { // number : 2, bitwidth : 14 + final int howmany = finalout - currentPos < 2 ? finalout - currentPos : 2; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (14 * k + 4)) >>> 18; + } + break; + } + case 8: { // number : 1, bitwidth : 28 + out[currentPos++] = (val << 4) >>> 4; + break; + } + default: { + throw new RuntimeException("shouldn't happen"); + } + } + } + + outpos.set(finalout); + inpos.set(tmpinpos); + + } + + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + compressedPositions.add(inlength); + return inlength; + } +} diff --git a/src/main/java/me/lemire/integercompression/IntCompressor.java b/src/main/java/me/lemire/integercompression/IntCompressor.java index 87e7bde..30f755c 100644 --- a/src/main/java/me/lemire/integercompression/IntCompressor.java +++ b/src/main/java/me/lemire/integercompression/IntCompressor.java @@ -33,20 +33,14 @@ public IntCompressor() { * * @param input array to be compressed * @return compressed array - * @throws UncompressibleInputException if the data is too poorly compressible */ public int[] compress(int[] input) { - int [] compressed = new int[input.length + input.length / 100 + 1024]; + int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), input.length); + int[] compressed = new int[maxCompressedLength + 1]; // +1 to store the length of the input + // Store at index=0 the length of the input, hence enabling .headlessCompress compressed[0] = input.length; IntWrapper outpos = new IntWrapper(1); - try { - codec.headlessCompress(input, new IntWrapper(0), - input.length, compressed, outpos); - } catch (IndexOutOfBoundsException ioebe) { - throw new - UncompressibleInputException("Your input is too poorly compressible " - + "with the current codec : "+codec); - } + codec.headlessCompress(input, new IntWrapper(0), input.length, compressed, outpos); compressed = Arrays.copyOf(compressed,outpos.intValue()); return compressed; } @@ -58,6 +52,7 @@ public int[] compress(int[] input) { * @return uncompressed array */ public int[] uncompress(int[] compressed) { + // Read at index=0 the length of the input, hence enabling .headlessUncompress int[] decompressed = new int[compressed[0]]; IntWrapper inpos = new IntWrapper(1); codec.headlessUncompress(compressed, inpos, diff --git a/src/main/java/me/lemire/integercompression/IntegerCODEC.java b/src/main/java/me/lemire/integercompression/IntegerCODEC.java index 7929e48..1dd9a4c 100644 --- a/src/main/java/me/lemire/integercompression/IntegerCODEC.java +++ b/src/main/java/me/lemire/integercompression/IntegerCODEC.java @@ -18,14 +18,14 @@ public interface IntegerCODEC { * Compress data from an array to another array. * * Both inpos and outpos are modified to represent how much data was - * read and written to if 12 ints (inlength = 12) are compressed to 3 + * read and written to. If 12 ints (inlength = 12) are compressed to 3 * ints, then inpos will be incremented by 12 while outpos will be - * incremented by 3 we use IntWrapper to pass the values by reference. + * incremented by 3. We use IntWrapper to pass the values by reference. * * @param in * input array * @param inpos - * location in the input array + * where to start reading in the array * @param inlength * how many integers to compress * @param out @@ -52,7 +52,7 @@ public void compress(int[] in, IntWrapper inpos, int inlength, * @param out * array where to write the compressed output * @param outpos - * where to write the compressed output in out + * where to start writing the uncompressed output in out */ public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos); diff --git a/src/main/java/me/lemire/integercompression/JustCopy.java b/src/main/java/me/lemire/integercompression/JustCopy.java index 709b86a..f57282c 100644 --- a/src/main/java/me/lemire/integercompression/JustCopy.java +++ b/src/main/java/me/lemire/integercompression/JustCopy.java @@ -42,6 +42,12 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + compressedPositions.add(inlength); + return inlength; + } + @Override public void compress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { diff --git a/src/main/java/me/lemire/integercompression/Kamikaze.java b/src/main/java/me/lemire/integercompression/Kamikaze.java index fd1ac82..4cab30b 100644 --- a/src/main/java/me/lemire/integercompression/Kamikaze.java +++ b/src/main/java/me/lemire/integercompression/Kamikaze.java @@ -38,6 +38,11 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o } } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + throw new UnsupportedOperationException("Calculating the max compressed length is not supported yet."); + } + @Override public String toString() { return "Kamikaze's PForDelta"; @@ -64,4 +69,4 @@ public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out, headlessUncompress(in, inpos, inlength, out, outpos, outlength); } -} \ No newline at end of file +} diff --git a/src/main/java/me/lemire/integercompression/NewPFD.java b/src/main/java/me/lemire/integercompression/NewPFD.java index 6dd01aa..3da3002 100644 --- a/src/main/java/me/lemire/integercompression/NewPFD.java +++ b/src/main/java/me/lemire/integercompression/NewPFD.java @@ -132,6 +132,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, decodePage(in, inpos, out, outpos, mynvalue); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + int blockCount = inlength / BLOCK_SIZE; + // +1 for the header + // getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers. + int maxBlockSizeInInts = 1 + BLOCK_SIZE; + compressedPositions.add(inlength); + return maxBlockSizeInInts * blockCount; + } + private void decodePage(int[] in, IntWrapper inpos, int[] out, IntWrapper outpos, int thissize) { int tmpoutpos = outpos.get(); diff --git a/src/main/java/me/lemire/integercompression/NewPFDS16.java b/src/main/java/me/lemire/integercompression/NewPFDS16.java index 98370d2..526b8fb 100644 --- a/src/main/java/me/lemire/integercompression/NewPFDS16.java +++ b/src/main/java/me/lemire/integercompression/NewPFDS16.java @@ -131,6 +131,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, decodePage(in, inpos, out, outpos, mynvalue); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + int blockCount = inlength / BLOCK_SIZE; + // +1 for the header + // getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers. + int maxBlockSizeInInts = 1 + BLOCK_SIZE; + compressedPositions.add(inlength); + return maxBlockSizeInInts * blockCount; + } + private void decodePage(int[] in, IntWrapper inpos, int[] out, IntWrapper outpos, int thissize) { int tmpoutpos = outpos.get(); diff --git a/src/main/java/me/lemire/integercompression/NewPFDS9.java b/src/main/java/me/lemire/integercompression/NewPFDS9.java index c8389c1..bd802b6 100644 --- a/src/main/java/me/lemire/integercompression/NewPFDS9.java +++ b/src/main/java/me/lemire/integercompression/NewPFDS9.java @@ -130,6 +130,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, decodePage(in, inpos, out, outpos, mynvalue); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + int blockCount = inlength / BLOCK_SIZE; + // +1 for the header + // getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers. + int maxBlockSizeInInts = 1 + BLOCK_SIZE; + compressedPositions.add(inlength); + return maxBlockSizeInInts * blockCount; + } + private void decodePage(int[] in, IntWrapper inpos, int[] out, IntWrapper outpos, int thissize) { int tmpoutpos = outpos.get(); diff --git a/src/main/java/me/lemire/integercompression/OptPFD.java b/src/main/java/me/lemire/integercompression/OptPFD.java index 8c90586..cfda92e 100644 --- a/src/main/java/me/lemire/integercompression/OptPFD.java +++ b/src/main/java/me/lemire/integercompression/OptPFD.java @@ -147,6 +147,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, decodePage(in, inpos, out, outpos, mynvalue); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + int blockCount = inlength / BLOCK_SIZE; + // +1 for the header + // getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers. + int maxBlockSizeInInts = 1 + BLOCK_SIZE; + compressedPositions.add(inlength); + return maxBlockSizeInInts * blockCount; + } + private void decodePage(int[] in, IntWrapper inpos, int[] out, IntWrapper outpos, int thissize) { int tmpoutpos = outpos.get(); diff --git a/src/main/java/me/lemire/integercompression/OptPFDS16.java b/src/main/java/me/lemire/integercompression/OptPFDS16.java index 8574b10..95c4f62 100644 --- a/src/main/java/me/lemire/integercompression/OptPFDS16.java +++ b/src/main/java/me/lemire/integercompression/OptPFDS16.java @@ -147,6 +147,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, decodePage(in, inpos, out, outpos, mynvalue); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + int blockCount = inlength / BLOCK_SIZE; + // +1 for the header + // getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers. + int maxBlockSizeInInts = 1 + BLOCK_SIZE; + compressedPositions.add(inlength); + return maxBlockSizeInInts * blockCount; + } + private void decodePage(int[] in, IntWrapper inpos, int[] out, IntWrapper outpos, int thissize) { int tmpoutpos = outpos.get(); @@ -197,4 +208,4 @@ public String toString() { return this.getClass().getSimpleName(); } -} \ No newline at end of file +} diff --git a/src/main/java/me/lemire/integercompression/OptPFDS9.java b/src/main/java/me/lemire/integercompression/OptPFDS9.java index 34f4206..0e2563b 100644 --- a/src/main/java/me/lemire/integercompression/OptPFDS9.java +++ b/src/main/java/me/lemire/integercompression/OptPFDS9.java @@ -146,6 +146,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, decodePage(in, inpos, out, outpos, mynvalue); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + int blockCount = inlength / BLOCK_SIZE; + // +1 for the header + // getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers. + int maxBlockSizeInInts = 1 + BLOCK_SIZE; + compressedPositions.add(inlength); + return maxBlockSizeInInts * blockCount; + } + private void decodePage(int[] in, IntWrapper inpos, int[] out, IntWrapper outpos, int thissize) { int tmpoutpos = outpos.get(); @@ -197,4 +208,4 @@ public String toString() { return this.getClass().getSimpleName(); } -} \ No newline at end of file +} diff --git a/src/main/java/me/lemire/integercompression/S16.java b/src/main/java/me/lemire/integercompression/S16.java index 08ffbc4..e40522d 100644 --- a/src/main/java/me/lemire/integercompression/S16.java +++ b/src/main/java/me/lemire/integercompression/S16.java @@ -15,191 +15,191 @@ */ public final class S16 { - /** - * Compress an integer array using Simple16 - * - * - * @param in - * array to compress - * @param currentPos - * where to start reading - * @param inlength - * how many integers to read - * @param out - * output array - * @param tmpoutpos - * location in the output array - * @return the number of 32-bit words written (in compressed form) - */ - public static int compress(final int[] in, int currentPos, int inlength, final int out[], final int tmpoutpos) { - int outpos = tmpoutpos; - final int finalin = currentPos + inlength; - while (currentPos < finalin) { - int inoffset = compressblock(out, outpos++, in, currentPos, inlength); - if (inoffset == -1) - throw new RuntimeException("Too big a number"); - currentPos += inoffset; - inlength -= inoffset; - } - return outpos - tmpoutpos; - } - - /** - * Estimate size of the compressed output. - * - * @param in - * array to compress - * @param currentPos - * where to start reading - * @param inlength - * how many integers to read - * @return estimated size of the output (in 32-bit integers) - */ - public static int estimatecompress(final int[] in, int currentPos, int inlength) { - final int finalin = currentPos + inlength; - int counter = 0; - while (currentPos < finalin) { - int inoffset = fakecompressblock(in, currentPos, inlength); - if (inoffset == -1) - throw new RuntimeException("Too big a number"); - currentPos += inoffset; - inlength -= inoffset; - ++counter; - } - return counter; - } - - /** - * Compress an integer array using Simple16 - * - * @param out - * the compressed output - * @param outOffset - * the offset of the output in the number of integers - * @param in - * the integer input array - * @param inOffset - * the offset of the input in the number of integers - * @param n - * the number of elements to be compressed - * @return the size of the outputs in 32-bit integers - * - */ - public static final int compressblock(int[] out, int outOffset, int[] in, int inOffset, int n) { - int numIdx, j, num, bits; - for (numIdx = 0; numIdx < S16_NUMSIZE; numIdx++) { - out[outOffset] = numIdx << S16_BITSSIZE; - num = (S16_NUM[numIdx] < n) ? S16_NUM[numIdx] : n; - - for (j = 0, bits = 0; (j < num) && (in[inOffset + j] < SHIFTED_S16_BITS[numIdx][j]);) { - out[outOffset] |= (in[inOffset + j] << bits); - bits += S16_BITS[numIdx][j]; - j++; - } - - if (j == num) { - return num; - } - } - - return -1; - } - - private static final int fakecompressblock(int[] in, int inOffset, int n) { - int numIdx, j, num; - for (numIdx = 0; numIdx < S16_NUMSIZE; numIdx++) { - num = (S16_NUM[numIdx] < n) ? S16_NUM[numIdx] : n; - - for (j = 0; (j < num) && (in[inOffset + j] < SHIFTED_S16_BITS[numIdx][j]);) { - j++; - } - - if (j == num) { - return num; - } - } - - return -1; - } - - /** - * Decompress an integer array using Simple16 - * - * @param out - * the decompressed output - * @param outOffset - * the offset of the output in the number of integers - * @param in - * the compressed input array - * @param inOffset - * the offset of the input in the number of integers - * @param n - * the number of elements to be compressed - * @return the number of processed integers - */ - public static final int decompressblock(int[] out, int outOffset, int[] in, int inOffset, int n) { - int numIdx, j = 0, bits = 0; - numIdx = in[inOffset] >>> S16_BITSSIZE; - int num = S16_NUM[numIdx] < n ? S16_NUM[numIdx] : n; - for (j = 0, bits = 0; j < num; j++) { - out[outOffset + j] = (in[inOffset] >>> bits) & (0xffffffff >>> (32 - S16_BITS[numIdx][j])); - bits += S16_BITS[numIdx][j]; - } - return num; - } - - /** - * Uncompressed data from an input array into an output array - * - * @param in - * input array (in compressed form) - * @param tmpinpos - * starting location in the compressed input array - * @param inlength - * how much data we wish the read (in 32-bit words) - * @param out - * output array (in decompressed form) - * @param currentPos - * current position in the output array - * @param outlength - * available data in the output array - */ - public static void uncompress(final int[] in, int tmpinpos, final int inlength, final int[] out, int currentPos, - int outlength) { - final int finalpos = tmpinpos + inlength; - while (tmpinpos < finalpos) { - final int howmany = decompressblock(out, currentPos, in, tmpinpos, outlength); - outlength -= howmany; - currentPos += howmany; - tmpinpos += 1; - } - - } - - private static int[][] shiftme(int[][] x) { - int[][] answer = new int[x.length][]; - for (int k = 0; k < x.length; ++k) { - answer[k] = new int[x[k].length]; - for (int z = 0; z < answer[k].length; ++z) - answer[k][z] = 1 << x[k][z]; - } - return answer; - } - - private static final int S16_NUMSIZE = 16; - private static final int S16_BITSSIZE = 28; - // the possible number of bits used to represent one integer - private static final int[] S16_NUM = { 28, 21, 21, 21, 14, 9, 8, 7, 6, 6, 5, 5, 4, 3, 2, 1 }; - // the corresponding number of elements for each value of the number of - // bits - private static final int[][] S16_BITS = { - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, - { 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, - { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1 }, - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2 }, - { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, { 4, 3, 3, 3, 3, 3, 3, 3, 3 }, { 3, 4, 4, 4, 4, 3, 3, 3 }, - { 4, 4, 4, 4, 4, 4, 4 }, { 5, 5, 5, 5, 4, 4 }, { 4, 4, 5, 5, 5, 5 }, { 6, 6, 6, 5, 5 }, { 5, 5, 6, 6, 6 }, - { 7, 7, 7, 7 }, { 10, 9, 9, }, { 14, 14 }, { 28 } }; - private static final int[][] SHIFTED_S16_BITS = shiftme(S16_BITS); + /** + * Compress an integer array using Simple16 + * + * + * @param in + * array to compress + * @param currentPos + * where to start reading + * @param inlength + * how many integers to read + * @param out + * output array + * @param tmpoutpos + * location in the output array + * @return the number of 32-bit words written (in compressed form) + */ + public static int compress(final int[] in, int currentPos, int inlength, final int out[], final int tmpoutpos) { + int outpos = tmpoutpos; + final int finalin = currentPos + inlength; + while (currentPos < finalin) { + int inoffset = compressblock(out, outpos++, in, currentPos, inlength); + if (inoffset == -1) + throw new RuntimeException("Too big a number"); + currentPos += inoffset; + inlength -= inoffset; + } + return outpos - tmpoutpos; + } + + /** + * Estimate size of the compressed output. + * + * @param in + * array to compress + * @param currentPos + * where to start reading + * @param inlength + * how many integers to read + * @return estimated size of the output (in 32-bit integers) + */ + public static int estimatecompress(final int[] in, int currentPos, int inlength) { + final int finalin = currentPos + inlength; + int counter = 0; + while (currentPos < finalin) { + int inoffset = fakecompressblock(in, currentPos, inlength); + if (inoffset == -1) + throw new RuntimeException("Too big a number"); + currentPos += inoffset; + inlength -= inoffset; + ++counter; + } + return counter; + } + + /** + * Compress an integer array using Simple16 + * + * @param out + * the compressed output + * @param outOffset + * the offset of the output in the number of integers + * @param in + * the integer input array + * @param inOffset + * the offset of the input in the number of integers + * @param n + * the number of elements to be compressed + * @return the size of the outputs in 32-bit integers + * + */ + public static final int compressblock(int[] out, int outOffset, int[] in, int inOffset, int n) { + int numIdx, j, num, bits; + for (numIdx = 0; numIdx < S16_NUMSIZE; numIdx++) { + out[outOffset] = numIdx << S16_BITSSIZE; + num = (S16_NUM[numIdx] < n) ? S16_NUM[numIdx] : n; + + for (j = 0, bits = 0; (j < num) && (in[inOffset + j] < SHIFTED_S16_BITS[numIdx][j]);) { + out[outOffset] |= (in[inOffset + j] << bits); + bits += S16_BITS[numIdx][j]; + j++; + } + + if (j == num) { + return num; + } + } + + return -1; + } + + private static final int fakecompressblock(int[] in, int inOffset, int n) { + int numIdx, j, num; + for (numIdx = 0; numIdx < S16_NUMSIZE; numIdx++) { + num = (S16_NUM[numIdx] < n) ? S16_NUM[numIdx] : n; + + for (j = 0; (j < num) && (in[inOffset + j] < SHIFTED_S16_BITS[numIdx][j]);) { + j++; + } + + if (j == num) { + return num; + } + } + + return -1; + } + + /** + * Decompress an integer array using Simple16 + * + * @param out + * the decompressed output + * @param outOffset + * the offset of the output in the number of integers + * @param in + * the compressed input array + * @param inOffset + * the offset of the input in the number of integers + * @param n + * the number of elements to be compressed + * @return the number of processed integers + */ + public static final int decompressblock(int[] out, int outOffset, int[] in, int inOffset, int n) { + int numIdx, j = 0, bits = 0; + numIdx = in[inOffset] >>> S16_BITSSIZE; + int num = S16_NUM[numIdx] < n ? S16_NUM[numIdx] : n; + for (j = 0, bits = 0; j < num; j++) { + out[outOffset + j] = (in[inOffset] >>> bits) & (0xffffffff >>> (32 - S16_BITS[numIdx][j])); + bits += S16_BITS[numIdx][j]; + } + return num; + } + + /** + * Uncompressed data from an input array into an output array + * + * @param in + * input array (in compressed form) + * @param tmpinpos + * starting location in the compressed input array + * @param inlength + * how much data we wish the read (in 32-bit words) + * @param out + * output array (in decompressed form) + * @param currentPos + * current position in the output array + * @param outlength + * available data in the output array + */ + public static void uncompress(final int[] in, int tmpinpos, final int inlength, final int[] out, int currentPos, + int outlength) { + final int finalpos = tmpinpos + inlength; + while (tmpinpos < finalpos) { + final int howmany = decompressblock(out, currentPos, in, tmpinpos, outlength); + outlength -= howmany; + currentPos += howmany; + tmpinpos += 1; + } + + } + + private static int[][] shiftme(int[][] x) { + int[][] answer = new int[x.length][]; + for (int k = 0; k < x.length; ++k) { + answer[k] = new int[x[k].length]; + for (int z = 0; z < answer[k].length; ++z) + answer[k][z] = 1 << x[k][z]; + } + return answer; + } + + private static final int S16_NUMSIZE = 16; + private static final int S16_BITSSIZE = 28; + // the possible number of bits used to represent one integer + private static final int[] S16_NUM = { 28, 21, 21, 21, 14, 9, 8, 7, 6, 6, 5, 5, 4, 3, 2, 1 }; + // the corresponding number of elements for each value of the number of + // bits + private static final int[][] S16_BITS = { + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1 }, + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2 }, + { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, { 4, 3, 3, 3, 3, 3, 3, 3, 3 }, { 3, 4, 4, 4, 4, 3, 3, 3 }, + { 4, 4, 4, 4, 4, 4, 4 }, { 5, 5, 5, 5, 4, 4 }, { 4, 4, 5, 5, 5, 5 }, { 6, 6, 6, 5, 5 }, { 5, 5, 6, 6, 6 }, + { 7, 7, 7, 7 }, { 10, 9, 9, }, { 14, 14 }, { 28 } }; + private static final int[][] SHIFTED_S16_BITS = shiftme(S16_BITS); } diff --git a/src/main/java/me/lemire/integercompression/S9.java b/src/main/java/me/lemire/integercompression/S9.java index 2180e5a..7e03e42 100644 --- a/src/main/java/me/lemire/integercompression/S9.java +++ b/src/main/java/me/lemire/integercompression/S9.java @@ -17,187 +17,187 @@ public final class S9 { - /** - * Estimate size of the compressed output. - * - * @param in - * array to compress - * @param currentPos - * where to start reading - * @param inlength - * how many integers to read - * @return estimated size of the output (in 32-bit integers) - */ - public static int estimatecompress(int[] in, int currentPos, int inlength) { - int tmpoutpos = 0; - int finalpos = currentPos + inlength; - outer: while (currentPos < finalpos) { - mainloop: for (int selector = 0; selector < 8; selector++) { + /** + * Estimate size of the compressed output. + * + * @param in + * array to compress + * @param currentPos + * where to start reading + * @param inlength + * how many integers to read + * @return estimated size of the output (in 32-bit integers) + */ + public static int estimatecompress(int[] in, int currentPos, int inlength) { + int tmpoutpos = 0; + int finalpos = currentPos + inlength; + outer: while (currentPos < finalpos) { + mainloop: for (int selector = 0; selector < 8; selector++) { - int compressedNum = codeNum[selector]; - if (finalpos <= currentPos + compressedNum - 1) - compressedNum = finalpos - currentPos; - int b = bitLength[selector]; - int max = 1 << b; - int i = 0; - for (; i < compressedNum; i++) - if (Util.smallerorequalthan(max , in[currentPos + i])) - continue mainloop; - currentPos += compressedNum; - ++tmpoutpos; - continue outer; - } - final int selector = 8; - if (in[currentPos] >= 1 << bitLength[selector]) - throw new RuntimeException("Too big a number"); - tmpoutpos++; - currentPos++; + int compressedNum = codeNum[selector]; + if (finalpos <= currentPos + compressedNum - 1) + compressedNum = finalpos - currentPos; + int b = bitLength[selector]; + int max = 1 << b; + int i = 0; + for (; i < compressedNum; i++) + if (Util.smallerorequalthan(max , in[currentPos + i])) + continue mainloop; + currentPos += compressedNum; + ++tmpoutpos; + continue outer; + } + final int selector = 8; + if (in[currentPos] >= 1 << bitLength[selector]) + throw new RuntimeException("Too big a number"); + tmpoutpos++; + currentPos++; - } - return tmpoutpos; - } + } + return tmpoutpos; + } - /** - * Compress an integer array using Simple9 - * - * - * @param in - * array to compress - * @param currentPos - * where to start reading - * @param inlength - * how many integers to read - * @param out - * output array - * @param tmpoutpos - * location in the output array - * @return the number of 32-bit words written (in compressed form) - */ - public static int compress(int[] in, int currentPos, int inlength, int out[], int tmpoutpos) { - int origtmpoutpos = tmpoutpos; - int finalpos = currentPos + inlength; - outer: while (currentPos < finalpos) { - mainloop: for (int selector = 0; selector < 8; selector++) { - int res = 0; - int compressedNum = codeNum[selector]; - if (finalpos <= currentPos + compressedNum - 1) - compressedNum = finalpos - currentPos; - int b = bitLength[selector]; - int max = 1 << b; - int i = 0; - for (; i < compressedNum; i++) { - if (Util.smallerorequalthan(max, in[currentPos + i])) - continue mainloop; - res = (res << b) + in[currentPos + i]; - } - if (compressedNum != codeNum[selector]) - res <<= (codeNum[selector] - compressedNum) * b; - res |= selector << 28; - out[tmpoutpos++] = res; - currentPos += compressedNum; - continue outer; - } - final int selector = 8; - if (in[currentPos] >= 1 << bitLength[selector]) - throw new RuntimeException("Too big a number"); - out[tmpoutpos++] = in[currentPos++] | (selector << 28); - } - return tmpoutpos - origtmpoutpos; - } + /** + * Compress an integer array using Simple9 + * + * + * @param in + * array to compress + * @param currentPos + * where to start reading + * @param inlength + * how many integers to read + * @param out + * output array + * @param tmpoutpos + * location in the output array + * @return the number of 32-bit words written (in compressed form) + */ + public static int compress(int[] in, int currentPos, int inlength, int out[], int tmpoutpos) { + int origtmpoutpos = tmpoutpos; + int finalpos = currentPos + inlength; + outer: while (currentPos < finalpos) { + mainloop: for (int selector = 0; selector < 8; selector++) { + int res = 0; + int compressedNum = codeNum[selector]; + if (finalpos <= currentPos + compressedNum - 1) + compressedNum = finalpos - currentPos; + int b = bitLength[selector]; + int max = 1 << b; + int i = 0; + for (; i < compressedNum; i++) { + if (Util.smallerorequalthan(max, in[currentPos + i])) + continue mainloop; + res = (res << b) + in[currentPos + i]; + } + if (compressedNum != codeNum[selector]) + res <<= (codeNum[selector] - compressedNum) * b; + res |= selector << 28; + out[tmpoutpos++] = res; + currentPos += compressedNum; + continue outer; + } + final int selector = 8; + if (in[currentPos] >= 1 << bitLength[selector]) + throw new RuntimeException("Too big a number"); + out[tmpoutpos++] = in[currentPos++] | (selector << 28); + } + return tmpoutpos - origtmpoutpos; + } - /** - * Uncompressed data from an input array into an output array - * - * @param in - * input array (in compressed form) - * @param tmpinpos - * starting location in the compressed input array - * @param inlength - * how much data we wish the read (in 32-bit words) - * @param out - * output array (in decompressed form) - * @param currentPos - * current position in the output array - * @param outlength - * available data in the output array - */ - public static void uncompress(int[] in, int tmpinpos, int inlength, int[] out, int currentPos, int outlength) { - int finallength = currentPos + outlength; + /** + * Uncompressed data from an input array into an output array + * + * @param in + * input array (in compressed form) + * @param tmpinpos + * starting location in the compressed input array + * @param inlength + * how much data we wish the read (in 32-bit words) + * @param out + * output array (in decompressed form) + * @param currentPos + * current position in the output array + * @param outlength + * available data in the output array + */ + public static void uncompress(int[] in, int tmpinpos, int inlength, int[] out, int currentPos, int outlength) { + int finallength = currentPos + outlength; - while (currentPos < finallength) { - int val = in[tmpinpos++]; - int header = val >>> 28; - switch (header) { - case 0: { // number : 28, bitwidth : 1 - final int howmany = finallength - currentPos < 28 ? finallength - currentPos : 28; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (k + 4)) >>> 31; - } - break; - } - case 1: { // number : 14, bitwidth : 2 - final int howmany = finallength - currentPos < 14 ? finallength - currentPos : 14; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (2 * k + 4)) >>> 30; - } - break; - } - case 2: { // number : 9, bitwidth : 3 - final int howmany = finallength - currentPos < 9 ? finallength - currentPos : 9; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (3 * k + 5)) >>> 29; - } - break; - } - case 3: { // number : 7, bitwidth : 4 - final int howmany = finallength - currentPos < 7 ? finallength - currentPos : 7; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (4 * k + 4)) >>> 28; - } - break; - } - case 4: { // number : 5, bitwidth : 5 - final int howmany = finallength - currentPos < 5 ? finallength - currentPos : 5; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (5 * k + 7)) >>> 27; - } - break; - } - case 5: { // number : 4, bitwidth : 7 - final int howmany = finallength - currentPos < 4 ? finallength - currentPos : 4; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (7 * k + 4)) >>> 25; - } - break; - } - case 6: { // number : 3, bitwidth : 9 - final int howmany = finallength - currentPos < 3 ? finallength - currentPos : 3; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (9 * k + 5)) >>> 23; - } - break; - } - case 7: { // number : 2, bitwidth : 14 - final int howmany = finallength - currentPos < 2 ? finallength - currentPos : 2; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (14 * k + 4)) >>> 18; - } - break; - } - case 8: { // number : 1, bitwidth : 28 - out[currentPos++] = (val << 4) >>> 4; - break; - } - default: { - throw new RuntimeException("shouldn't happen"); - } - } - } + while (currentPos < finallength) { + int val = in[tmpinpos++]; + int header = val >>> 28; + switch (header) { + case 0: { // number : 28, bitwidth : 1 + final int howmany = finallength - currentPos < 28 ? finallength - currentPos : 28; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (k + 4)) >>> 31; + } + break; + } + case 1: { // number : 14, bitwidth : 2 + final int howmany = finallength - currentPos < 14 ? finallength - currentPos : 14; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (2 * k + 4)) >>> 30; + } + break; + } + case 2: { // number : 9, bitwidth : 3 + final int howmany = finallength - currentPos < 9 ? finallength - currentPos : 9; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (3 * k + 5)) >>> 29; + } + break; + } + case 3: { // number : 7, bitwidth : 4 + final int howmany = finallength - currentPos < 7 ? finallength - currentPos : 7; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (4 * k + 4)) >>> 28; + } + break; + } + case 4: { // number : 5, bitwidth : 5 + final int howmany = finallength - currentPos < 5 ? finallength - currentPos : 5; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (5 * k + 7)) >>> 27; + } + break; + } + case 5: { // number : 4, bitwidth : 7 + final int howmany = finallength - currentPos < 4 ? finallength - currentPos : 4; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (7 * k + 4)) >>> 25; + } + break; + } + case 6: { // number : 3, bitwidth : 9 + final int howmany = finallength - currentPos < 3 ? finallength - currentPos : 3; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (9 * k + 5)) >>> 23; + } + break; + } + case 7: { // number : 2, bitwidth : 14 + final int howmany = finallength - currentPos < 2 ? finallength - currentPos : 2; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (14 * k + 4)) >>> 18; + } + break; + } + case 8: { // number : 1, bitwidth : 28 + out[currentPos++] = (val << 4) >>> 4; + break; + } + default: { + throw new RuntimeException("shouldn't happen"); + } + } + } - } + } - private final static int bitLength[] = { 1, 2, 3, 4, 5, 7, 9, 14, 28 }; + private final static int bitLength[] = { 1, 2, 3, 4, 5, 7, 9, 14, 28 }; - private final static int codeNum[] = { 28, 14, 9, 7, 5, 4, 3, 2, 1 }; + private final static int codeNum[] = { 28, 14, 9, 7, 5, 4, 3, 2, 1 }; } diff --git a/src/main/java/me/lemire/integercompression/Simple16.java b/src/main/java/me/lemire/integercompression/Simple16.java index e0f9d5a..2b7f27f 100644 --- a/src/main/java/me/lemire/integercompression/Simple16.java +++ b/src/main/java/me/lemire/integercompression/Simple16.java @@ -13,173 +13,179 @@ */ public final class Simple16 implements IntegerCODEC, SkippableIntegerCODEC { - public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int out[], IntWrapper outpos) { - int i_inpos = inpos.get(); - int i_outpos = outpos.get(); - final int finalin = i_inpos + inlength; - while (i_inpos < finalin) { - int inoffset = compressblock(out, i_outpos++, in, i_inpos, inlength); - if (inoffset == -1) - throw new RuntimeException("Too big a number"); - i_inpos += inoffset; - inlength -= inoffset; - } - inpos.set(i_inpos); - outpos.set(i_outpos); - } - - /** - * Compress an integer array using Simple16 - * - * @param out - * the compressed output - * @param outOffset - * the offset of the output in the number of integers - * @param in - * the integer input array - * @param inOffset - * the offset of the input in the number of integers - * @param n - * the number of elements to be compressed - * @return the number of compressed integers - */ - public static final int compressblock(int[] out, int outOffset, int[] in, int inOffset, int n) { - int numIdx, j, num, bits; - for (numIdx = 0; numIdx < S16_NUMSIZE; numIdx++) { - out[outOffset] = numIdx << S16_BITSSIZE; - num = (S16_NUM[numIdx] < n) ? S16_NUM[numIdx] : n; - - for (j = 0, bits = 0; (j < num) && (in[inOffset + j] < SHIFTED_S16_BITS[numIdx][j]);) { - out[outOffset] |= (in[inOffset + j] << bits); - bits += S16_BITS[numIdx][j]; - j++; - } - - if (j == num) { - return num; - } - } - - return -1; - } - - /** - * Decompress an integer array using Simple16 - * - * @param out - * the decompressed output - * @param outOffset - * the offset of the output in the number of integers - * @param in - * the compressed input array - * @param inOffset - * the offset of the input in the number of integers - * @param n - * the number of elements to be compressed - * @return the number of processed integers - */ - public static final int decompressblock(int[] out, int outOffset, int[] in, int inOffset, int n) { - int numIdx, j = 0, bits = 0; - numIdx = in[inOffset] >>> S16_BITSSIZE; - int num = S16_NUM[numIdx] < n ? S16_NUM[numIdx] : n; - for (j = 0, bits = 0; j < num; j++) { - out[outOffset + j] = (in[inOffset] >>> bits) & (0xffffffff >>> (32 - S16_BITS[numIdx][j])); - bits += S16_BITS[numIdx][j]; - } - return num; - } - - @Override - public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos, int num) { - int i_inpos = inpos.get(); - int i_outpos = outpos.get(); - while (num > 0) { - final int howmany = decompressblock(out, i_outpos, in, i_inpos, num); - num -= howmany; - i_outpos += howmany; - i_inpos++; - } - inpos.set(i_inpos); - outpos.set(i_outpos); - } - - /** - * Uncompress data from an array to another array. - * - * Both inpos and outpos parameters are modified to indicate new positions - * after read/write. - * - * @param in - * array containing data in compressed form - * @param tmpinpos - * where to start reading in the array - * @param inlength - * length of the compressed data (ignored by some schemes) - * @param out - * array where to write the compressed output - * @param currentPos - * where to write the compressed output in out - * @param outlength - * number of integers we want to decode - */ - public static void uncompress(int[] in, int tmpinpos, int inlength, int[] out, int currentPos, int outlength) { - final int finalpos = tmpinpos + inlength; - while (tmpinpos < finalpos) { - final int howmany = decompressblock(out, currentPos, in, tmpinpos, outlength); - outlength -= howmany; - currentPos += howmany; - tmpinpos += 1; - } - - } - - private static int[][] shiftme(int[][] x) { - int[][] answer = new int[x.length][]; - for (int k = 0; k < x.length; ++k) { - answer[k] = new int[x[k].length]; - for (int z = 0; z < answer[k].length; ++z) - answer[k][z] = 1 << x[k][z]; - } - return answer; - } - - @Override - public void compress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { - if (inlength == 0) - return; - out[outpos.get()] = inlength; - outpos.increment(); - headlessCompress(in, inpos, inlength, out, outpos); - } - - @Override - public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { - if (inlength == 0) - return; - final int outlength = in[inpos.get()]; - inpos.increment(); - headlessUncompress(in, inpos, inlength, out, outpos, outlength); - - } - - @Override - public String toString() { - return this.getClass().getSimpleName(); - } - - private static final int S16_NUMSIZE = 16; - private static final int S16_BITSSIZE = 28; - // the possible number of bits used to represent one integer - private static final int[] S16_NUM = { 28, 21, 21, 21, 14, 9, 8, 7, 6, 6, 5, 5, 4, 3, 2, 1 }; - // the corresponding number of elements for each value of the number of bits - private static final int[][] S16_BITS = { - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, - { 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, - { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1 }, - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2 }, - { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, { 4, 3, 3, 3, 3, 3, 3, 3, 3 }, { 3, 4, 4, 4, 4, 3, 3, 3 }, - { 4, 4, 4, 4, 4, 4, 4 }, { 5, 5, 5, 5, 4, 4 }, { 4, 4, 5, 5, 5, 5 }, { 6, 6, 6, 5, 5 }, { 5, 5, 6, 6, 6 }, - { 7, 7, 7, 7 }, { 10, 9, 9, }, { 14, 14 }, { 28 } }; - private static final int[][] SHIFTED_S16_BITS = shiftme(S16_BITS); - -} \ No newline at end of file + public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int out[], IntWrapper outpos) { + int i_inpos = inpos.get(); + int i_outpos = outpos.get(); + final int finalin = i_inpos + inlength; + while (i_inpos < finalin) { + int inoffset = compressblock(out, i_outpos++, in, i_inpos, inlength); + if (inoffset == -1) + throw new RuntimeException("Too big a number"); + i_inpos += inoffset; + inlength -= inoffset; + } + inpos.set(i_inpos); + outpos.set(i_outpos); + } + + /** + * Compress an integer array using Simple16 + * + * @param out + * the compressed output + * @param outOffset + * the offset of the output in the number of integers + * @param in + * the integer input array + * @param inOffset + * the offset of the input in the number of integers + * @param n + * the number of elements to be compressed + * @return the number of compressed integers + */ + public static final int compressblock(int[] out, int outOffset, int[] in, int inOffset, int n) { + int numIdx, j, num, bits; + for (numIdx = 0; numIdx < S16_NUMSIZE; numIdx++) { + out[outOffset] = numIdx << S16_BITSSIZE; + num = (S16_NUM[numIdx] < n) ? S16_NUM[numIdx] : n; + + for (j = 0, bits = 0; (j < num) && (in[inOffset + j] < SHIFTED_S16_BITS[numIdx][j]);) { + out[outOffset] |= (in[inOffset + j] << bits); + bits += S16_BITS[numIdx][j]; + j++; + } + + if (j == num) { + return num; + } + } + + return -1; + } + + /** + * Decompress an integer array using Simple16 + * + * @param out + * the decompressed output + * @param outOffset + * the offset of the output in the number of integers + * @param in + * the compressed input array + * @param inOffset + * the offset of the input in the number of integers + * @param n + * the number of elements to be compressed + * @return the number of processed integers + */ + public static final int decompressblock(int[] out, int outOffset, int[] in, int inOffset, int n) { + int numIdx, j = 0, bits = 0; + numIdx = in[inOffset] >>> S16_BITSSIZE; + int num = S16_NUM[numIdx] < n ? S16_NUM[numIdx] : n; + for (j = 0, bits = 0; j < num; j++) { + out[outOffset + j] = (in[inOffset] >>> bits) & (0xffffffff >>> (32 - S16_BITS[numIdx][j])); + bits += S16_BITS[numIdx][j]; + } + return num; + } + + @Override + public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos, int num) { + int i_inpos = inpos.get(); + int i_outpos = outpos.get(); + while (num > 0) { + final int howmany = decompressblock(out, i_outpos, in, i_inpos, num); + num -= howmany; + i_outpos += howmany; + i_inpos++; + } + inpos.set(i_inpos); + outpos.set(i_outpos); + } + + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + compressedPositions.add(inlength); + return inlength; + } + + /** + * Uncompress data from an array to another array. + * + * Both inpos and outpos parameters are modified to indicate new positions + * after read/write. + * + * @param in + * array containing data in compressed form + * @param tmpinpos + * where to start reading in the array + * @param inlength + * length of the compressed data (ignored by some schemes) + * @param out + * array where to write the compressed output + * @param currentPos + * where to write the compressed output in out + * @param outlength + * number of integers we want to decode + */ + public static void uncompress(int[] in, int tmpinpos, int inlength, int[] out, int currentPos, int outlength) { + final int finalpos = tmpinpos + inlength; + while (tmpinpos < finalpos) { + final int howmany = decompressblock(out, currentPos, in, tmpinpos, outlength); + outlength -= howmany; + currentPos += howmany; + tmpinpos += 1; + } + + } + + private static int[][] shiftme(int[][] x) { + int[][] answer = new int[x.length][]; + for (int k = 0; k < x.length; ++k) { + answer[k] = new int[x[k].length]; + for (int z = 0; z < answer[k].length; ++z) + answer[k][z] = 1 << x[k][z]; + } + return answer; + } + + @Override + public void compress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { + if (inlength == 0) + return; + out[outpos.get()] = inlength; + outpos.increment(); + headlessCompress(in, inpos, inlength, out, outpos); + } + + @Override + public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { + if (inlength == 0) + return; + final int outlength = in[inpos.get()]; + inpos.increment(); + headlessUncompress(in, inpos, inlength, out, outpos, outlength); + + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + private static final int S16_NUMSIZE = 16; + private static final int S16_BITSSIZE = 28; + // the possible number of bits used to represent one integer + private static final int[] S16_NUM = { 28, 21, 21, 21, 14, 9, 8, 7, 6, 6, 5, 5, 4, 3, 2, 1 }; + // the corresponding number of elements for each value of the number of bits + private static final int[][] S16_BITS = { + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1 }, + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2 }, + { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, { 4, 3, 3, 3, 3, 3, 3, 3, 3 }, { 3, 4, 4, 4, 4, 3, 3, 3 }, + { 4, 4, 4, 4, 4, 4, 4 }, { 5, 5, 5, 5, 4, 4 }, { 4, 4, 5, 5, 5, 5 }, { 6, 6, 6, 5, 5 }, { 5, 5, 6, 6, 6 }, + { 7, 7, 7, 7 }, { 10, 9, 9, }, { 14, 14 }, { 28 } }; + private static final int[][] SHIFTED_S16_BITS = shiftme(S16_BITS); + +} diff --git a/src/main/java/me/lemire/integercompression/Simple9.java b/src/main/java/me/lemire/integercompression/Simple9.java index 032489d..fd5194d 100644 --- a/src/main/java/me/lemire/integercompression/Simple9.java +++ b/src/main/java/me/lemire/integercompression/Simple9.java @@ -20,280 +20,286 @@ public final class Simple9 implements IntegerCODEC, SkippableIntegerCODEC { - @Override - public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int out[], IntWrapper outpos) { - int tmpoutpos = outpos.get(); - int currentPos = inpos.get(); - final int finalin = currentPos + inlength; - outer: while (currentPos < finalin - 28) { - mainloop: for (int selector = 0; selector < 8; selector++) { + @Override + public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int out[], IntWrapper outpos) { + int tmpoutpos = outpos.get(); + int currentPos = inpos.get(); + final int finalin = currentPos + inlength; + outer: while (currentPos < finalin - 28) { + mainloop: for (int selector = 0; selector < 8; selector++) { - int res = 0; - int compressedNum = codeNum[selector]; - int b = bitLength[selector]; - int max = 1 << b; - int i = 0; - for (; i < compressedNum; i++) { - if (max <= in[currentPos + i]) - continue mainloop; - res = (res << b) + in[currentPos + i]; - } - res |= selector << 28; - out[tmpoutpos++] = res; - currentPos += compressedNum; - continue outer; - } - final int selector = 8; - if (in[currentPos] >= 1 << bitLength[selector]) - throw new RuntimeException("Too big a number"); - out[tmpoutpos++] = in[currentPos++] | (selector << 28); - } - outer: while (currentPos < finalin) { - mainloop: for (int selector = 0; selector < 8; selector++) { - int res = 0; - int compressedNum = codeNum[selector]; - if (finalin <= currentPos + compressedNum - 1) - compressedNum = finalin - currentPos; - int b = bitLength[selector]; - int max = 1 << b; - int i = 0; - for (; i < compressedNum; i++) { - if (max <= in[currentPos + i]) - continue mainloop; - res = (res << b) + in[currentPos + i]; - } + int res = 0; + int compressedNum = codeNum[selector]; + int b = bitLength[selector]; + int max = 1 << b; + int i = 0; + for (; i < compressedNum; i++) { + if (max <= in[currentPos + i]) + continue mainloop; + res = (res << b) + in[currentPos + i]; + } + res |= selector << 28; + out[tmpoutpos++] = res; + currentPos += compressedNum; + continue outer; + } + final int selector = 8; + if (in[currentPos] >= 1 << bitLength[selector]) + throw new RuntimeException("Too big a number"); + out[tmpoutpos++] = in[currentPos++] | (selector << 28); + } + outer: while (currentPos < finalin) { + mainloop: for (int selector = 0; selector < 8; selector++) { + int res = 0; + int compressedNum = codeNum[selector]; + if (finalin <= currentPos + compressedNum - 1) + compressedNum = finalin - currentPos; + int b = bitLength[selector]; + int max = 1 << b; + int i = 0; + for (; i < compressedNum; i++) { + if (max <= in[currentPos + i]) + continue mainloop; + res = (res << b) + in[currentPos + i]; + } - if (compressedNum != codeNum[selector]) - res <<= (codeNum[selector] - compressedNum) * b; - res |= selector << 28; - out[tmpoutpos++] = res; - currentPos += compressedNum; - continue outer; - } - final int selector = 8; - if (in[currentPos] >= 1 << bitLength[selector]) - throw new RuntimeException("Too big a number"); - out[tmpoutpos++] = in[currentPos++] | (selector << 28); - } - inpos.set(currentPos); - outpos.set(tmpoutpos); - } + if (compressedNum != codeNum[selector]) + res <<= (codeNum[selector] - compressedNum) * b; + res |= selector << 28; + out[tmpoutpos++] = res; + currentPos += compressedNum; + continue outer; + } + final int selector = 8; + if (in[currentPos] >= 1 << bitLength[selector]) + throw new RuntimeException("Too big a number"); + out[tmpoutpos++] = in[currentPos++] | (selector << 28); + } + inpos.set(currentPos); + outpos.set(tmpoutpos); + } - @Override - public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos, - int outlength) { - int currentPos = outpos.get(); - int tmpinpos = inpos.get(); - final int finalout = currentPos + outlength; - while (currentPos < finalout - 28) { - int val = in[tmpinpos++]; - int header = val >>> 28; - switch (header) { - case 0: { // number : 28, bitwidth : 1 - out[currentPos++] = (val << 4) >>> 31; - out[currentPos++] = (val << 5) >>> 31; - out[currentPos++] = (val << 6) >>> 31; - out[currentPos++] = (val << 7) >>> 31; - out[currentPos++] = (val << 8) >>> 31; - out[currentPos++] = (val << 9) >>> 31; - out[currentPos++] = (val << 10) >>> 31; - out[currentPos++] = (val << 11) >>> 31; - out[currentPos++] = (val << 12) >>> 31; - out[currentPos++] = (val << 13) >>> 31; // 10 - out[currentPos++] = (val << 14) >>> 31; - out[currentPos++] = (val << 15) >>> 31; - out[currentPos++] = (val << 16) >>> 31; - out[currentPos++] = (val << 17) >>> 31; - out[currentPos++] = (val << 18) >>> 31; - out[currentPos++] = (val << 19) >>> 31; - out[currentPos++] = (val << 20) >>> 31; - out[currentPos++] = (val << 21) >>> 31; - out[currentPos++] = (val << 22) >>> 31; - out[currentPos++] = (val << 23) >>> 31; // 20 - out[currentPos++] = (val << 24) >>> 31; - out[currentPos++] = (val << 25) >>> 31; - out[currentPos++] = (val << 26) >>> 31; - out[currentPos++] = (val << 27) >>> 31; - out[currentPos++] = (val << 28) >>> 31; - out[currentPos++] = (val << 29) >>> 31; - out[currentPos++] = (val << 30) >>> 31; - out[currentPos++] = (val << 31) >>> 31; - break; - } - case 1: { // number : 14, bitwidth : 2 - out[currentPos++] = (val << 4) >>> 30; - out[currentPos++] = (val << 6) >>> 30; - out[currentPos++] = (val << 8) >>> 30; - out[currentPos++] = (val << 10) >>> 30; - out[currentPos++] = (val << 12) >>> 30; - out[currentPos++] = (val << 14) >>> 30; - out[currentPos++] = (val << 16) >>> 30; - out[currentPos++] = (val << 18) >>> 30; - out[currentPos++] = (val << 20) >>> 30; - out[currentPos++] = (val << 22) >>> 30; // 10 - out[currentPos++] = (val << 24) >>> 30; - out[currentPos++] = (val << 26) >>> 30; - out[currentPos++] = (val << 28) >>> 30; - out[currentPos++] = (val << 30) >>> 30; - break; - } - case 2: { // number : 9, bitwidth : 3 - out[currentPos++] = (val << 5) >>> 29; - out[currentPos++] = (val << 8) >>> 29; - out[currentPos++] = (val << 11) >>> 29; - out[currentPos++] = (val << 14) >>> 29; - out[currentPos++] = (val << 17) >>> 29; - out[currentPos++] = (val << 20) >>> 29; - out[currentPos++] = (val << 23) >>> 29; - out[currentPos++] = (val << 26) >>> 29; - out[currentPos++] = (val << 29) >>> 29; - break; - } - case 3: { // number : 7, bitwidth : 4 - out[currentPos++] = (val << 4) >>> 28; - out[currentPos++] = (val << 8) >>> 28; - out[currentPos++] = (val << 12) >>> 28; - out[currentPos++] = (val << 16) >>> 28; - out[currentPos++] = (val << 20) >>> 28; - out[currentPos++] = (val << 24) >>> 28; - out[currentPos++] = (val << 28) >>> 28; - break; - } - case 4: { // number : 5, bitwidth : 5 - out[currentPos++] = (val << 7) >>> 27; - out[currentPos++] = (val << 12) >>> 27; - out[currentPos++] = (val << 17) >>> 27; - out[currentPos++] = (val << 22) >>> 27; - out[currentPos++] = (val << 27) >>> 27; - break; - } - case 5: { // number : 4, bitwidth : 7 - out[currentPos++] = (val << 4) >>> 25; - out[currentPos++] = (val << 11) >>> 25; - out[currentPos++] = (val << 18) >>> 25; - out[currentPos++] = (val << 25) >>> 25; - break; - } - case 6: { // number : 3, bitwidth : 9 - out[currentPos++] = (val << 5) >>> 23; - out[currentPos++] = (val << 14) >>> 23; - out[currentPos++] = (val << 23) >>> 23; - break; - } - case 7: { // number : 2, bitwidth : 14 - out[currentPos++] = (val << 4) >>> 18; - out[currentPos++] = (val << 18) >>> 18; - break; - } - case 8: { // number : 1, bitwidth : 28 - out[currentPos++] = (val << 4) >>> 4; - break; - } - default: { - throw new RuntimeException("shouldn't happen: limited to 28-bit integers"); - } - } - } - while (currentPos < finalout) { - int val = in[tmpinpos++]; - int header = val >>> 28; - switch (header) { - case 0: { // number : 28, bitwidth : 1 - final int howmany = finalout - currentPos; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (k + 4)) >>> 31; - } - break; - } - case 1: { // number : 14, bitwidth : 2 - final int howmany = finalout - currentPos < 14 ? finalout - currentPos : 14; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (2 * k + 4)) >>> 30; - } - break; - } - case 2: { // number : 9, bitwidth : 3 - final int howmany = finalout - currentPos < 9 ? finalout - currentPos : 9; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (3 * k + 5)) >>> 29; - } - break; - } - case 3: { // number : 7, bitwidth : 4 - final int howmany = finalout - currentPos < 7 ? finalout - currentPos : 7; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (4 * k + 4)) >>> 28; - } - break; - } - case 4: { // number : 5, bitwidth : 5 - final int howmany = finalout - currentPos < 5 ? finalout - currentPos : 5; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (5 * k + 7)) >>> 27; - } - break; - } - case 5: { // number : 4, bitwidth : 7 - final int howmany = finalout - currentPos < 4 ? finalout - currentPos : 4; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (7 * k + 4)) >>> 25; - } - break; - } - case 6: { // number : 3, bitwidth : 9 - final int howmany = finalout - currentPos < 3 ? finalout - currentPos : 3; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (9 * k + 5)) >>> 23; - } - break; - } - case 7: { // number : 2, bitwidth : 14 - final int howmany = finalout - currentPos < 2 ? finalout - currentPos : 2; - for (int k = 0; k < howmany; ++k) { - out[currentPos++] = (val << (14 * k + 4)) >>> 18; - } - break; - } - case 8: { // number : 1, bitwidth : 28 - out[currentPos++] = (val << 4) >>> 4; - break; - } - default: { - throw new RuntimeException("shouldn't happen"); - } - } - } - outpos.set(currentPos); - inpos.set(tmpinpos); + @Override + public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos, + int outlength) { + int currentPos = outpos.get(); + int tmpinpos = inpos.get(); + final int finalout = currentPos + outlength; + while (currentPos < finalout - 28) { + int val = in[tmpinpos++]; + int header = val >>> 28; + switch (header) { + case 0: { // number : 28, bitwidth : 1 + out[currentPos++] = (val << 4) >>> 31; + out[currentPos++] = (val << 5) >>> 31; + out[currentPos++] = (val << 6) >>> 31; + out[currentPos++] = (val << 7) >>> 31; + out[currentPos++] = (val << 8) >>> 31; + out[currentPos++] = (val << 9) >>> 31; + out[currentPos++] = (val << 10) >>> 31; + out[currentPos++] = (val << 11) >>> 31; + out[currentPos++] = (val << 12) >>> 31; + out[currentPos++] = (val << 13) >>> 31; // 10 + out[currentPos++] = (val << 14) >>> 31; + out[currentPos++] = (val << 15) >>> 31; + out[currentPos++] = (val << 16) >>> 31; + out[currentPos++] = (val << 17) >>> 31; + out[currentPos++] = (val << 18) >>> 31; + out[currentPos++] = (val << 19) >>> 31; + out[currentPos++] = (val << 20) >>> 31; + out[currentPos++] = (val << 21) >>> 31; + out[currentPos++] = (val << 22) >>> 31; + out[currentPos++] = (val << 23) >>> 31; // 20 + out[currentPos++] = (val << 24) >>> 31; + out[currentPos++] = (val << 25) >>> 31; + out[currentPos++] = (val << 26) >>> 31; + out[currentPos++] = (val << 27) >>> 31; + out[currentPos++] = (val << 28) >>> 31; + out[currentPos++] = (val << 29) >>> 31; + out[currentPos++] = (val << 30) >>> 31; + out[currentPos++] = (val << 31) >>> 31; + break; + } + case 1: { // number : 14, bitwidth : 2 + out[currentPos++] = (val << 4) >>> 30; + out[currentPos++] = (val << 6) >>> 30; + out[currentPos++] = (val << 8) >>> 30; + out[currentPos++] = (val << 10) >>> 30; + out[currentPos++] = (val << 12) >>> 30; + out[currentPos++] = (val << 14) >>> 30; + out[currentPos++] = (val << 16) >>> 30; + out[currentPos++] = (val << 18) >>> 30; + out[currentPos++] = (val << 20) >>> 30; + out[currentPos++] = (val << 22) >>> 30; // 10 + out[currentPos++] = (val << 24) >>> 30; + out[currentPos++] = (val << 26) >>> 30; + out[currentPos++] = (val << 28) >>> 30; + out[currentPos++] = (val << 30) >>> 30; + break; + } + case 2: { // number : 9, bitwidth : 3 + out[currentPos++] = (val << 5) >>> 29; + out[currentPos++] = (val << 8) >>> 29; + out[currentPos++] = (val << 11) >>> 29; + out[currentPos++] = (val << 14) >>> 29; + out[currentPos++] = (val << 17) >>> 29; + out[currentPos++] = (val << 20) >>> 29; + out[currentPos++] = (val << 23) >>> 29; + out[currentPos++] = (val << 26) >>> 29; + out[currentPos++] = (val << 29) >>> 29; + break; + } + case 3: { // number : 7, bitwidth : 4 + out[currentPos++] = (val << 4) >>> 28; + out[currentPos++] = (val << 8) >>> 28; + out[currentPos++] = (val << 12) >>> 28; + out[currentPos++] = (val << 16) >>> 28; + out[currentPos++] = (val << 20) >>> 28; + out[currentPos++] = (val << 24) >>> 28; + out[currentPos++] = (val << 28) >>> 28; + break; + } + case 4: { // number : 5, bitwidth : 5 + out[currentPos++] = (val << 7) >>> 27; + out[currentPos++] = (val << 12) >>> 27; + out[currentPos++] = (val << 17) >>> 27; + out[currentPos++] = (val << 22) >>> 27; + out[currentPos++] = (val << 27) >>> 27; + break; + } + case 5: { // number : 4, bitwidth : 7 + out[currentPos++] = (val << 4) >>> 25; + out[currentPos++] = (val << 11) >>> 25; + out[currentPos++] = (val << 18) >>> 25; + out[currentPos++] = (val << 25) >>> 25; + break; + } + case 6: { // number : 3, bitwidth : 9 + out[currentPos++] = (val << 5) >>> 23; + out[currentPos++] = (val << 14) >>> 23; + out[currentPos++] = (val << 23) >>> 23; + break; + } + case 7: { // number : 2, bitwidth : 14 + out[currentPos++] = (val << 4) >>> 18; + out[currentPos++] = (val << 18) >>> 18; + break; + } + case 8: { // number : 1, bitwidth : 28 + out[currentPos++] = (val << 4) >>> 4; + break; + } + default: { + throw new RuntimeException("shouldn't happen: limited to 28-bit integers"); + } + } + } + while (currentPos < finalout) { + int val = in[tmpinpos++]; + int header = val >>> 28; + switch (header) { + case 0: { // number : 28, bitwidth : 1 + final int howmany = finalout - currentPos; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (k + 4)) >>> 31; + } + break; + } + case 1: { // number : 14, bitwidth : 2 + final int howmany = finalout - currentPos < 14 ? finalout - currentPos : 14; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (2 * k + 4)) >>> 30; + } + break; + } + case 2: { // number : 9, bitwidth : 3 + final int howmany = finalout - currentPos < 9 ? finalout - currentPos : 9; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (3 * k + 5)) >>> 29; + } + break; + } + case 3: { // number : 7, bitwidth : 4 + final int howmany = finalout - currentPos < 7 ? finalout - currentPos : 7; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (4 * k + 4)) >>> 28; + } + break; + } + case 4: { // number : 5, bitwidth : 5 + final int howmany = finalout - currentPos < 5 ? finalout - currentPos : 5; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (5 * k + 7)) >>> 27; + } + break; + } + case 5: { // number : 4, bitwidth : 7 + final int howmany = finalout - currentPos < 4 ? finalout - currentPos : 4; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (7 * k + 4)) >>> 25; + } + break; + } + case 6: { // number : 3, bitwidth : 9 + final int howmany = finalout - currentPos < 3 ? finalout - currentPos : 3; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (9 * k + 5)) >>> 23; + } + break; + } + case 7: { // number : 2, bitwidth : 14 + final int howmany = finalout - currentPos < 2 ? finalout - currentPos : 2; + for (int k = 0; k < howmany; ++k) { + out[currentPos++] = (val << (14 * k + 4)) >>> 18; + } + break; + } + case 8: { // number : 1, bitwidth : 28 + out[currentPos++] = (val << 4) >>> 4; + break; + } + default: { + throw new RuntimeException("shouldn't happen"); + } + } + } + outpos.set(currentPos); + inpos.set(tmpinpos); - } + } - @Override - public void compress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { - if (inlength == 0) - return; - out[outpos.get()] = inlength; - outpos.increment(); - headlessCompress(in, inpos, inlength, out, outpos); - } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + compressedPositions.add(inlength); + return inlength; + } - @Override - public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { - if (inlength == 0) - return; - final int outlength = in[inpos.get()]; - inpos.increment(); - headlessUncompress(in, inpos, inlength, out, outpos, outlength); + @Override + public void compress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { + if (inlength == 0) + return; + out[outpos.get()] = inlength; + outpos.increment(); + headlessCompress(in, inpos, inlength, out, outpos); + } - } + @Override + public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { + if (inlength == 0) + return; + final int outlength = in[inpos.get()]; + inpos.increment(); + headlessUncompress(in, inpos, inlength, out, outpos, outlength); - private final static int bitLength[] = { 1, 2, 3, 4, 5, 7, 9, 14, 28 }; + } - private final static int codeNum[] = { 28, 14, 9, 7, 5, 4, 3, 2, 1 }; + private final static int bitLength[] = { 1, 2, 3, 4, 5, 7, 9, 14, 28 }; - @Override - public String toString() { - return this.getClass().getSimpleName(); - } + private final static int codeNum[] = { 28, 14, 9, 7, 5, 4, 3, 2, 1 }; + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } } diff --git a/src/main/java/me/lemire/integercompression/SkippableComposition.java b/src/main/java/me/lemire/integercompression/SkippableComposition.java index a235c47..fc3c18e 100644 --- a/src/main/java/me/lemire/integercompression/SkippableComposition.java +++ b/src/main/java/me/lemire/integercompression/SkippableComposition.java @@ -52,15 +52,27 @@ public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int[] out public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos, int num) { int init = inpos.get(); + int outposInit = outpos.get(); + F1.headlessUncompress(in, inpos, inlength, out, outpos, num); if (inpos.get() == init) { - inpos.increment(); + inpos.increment(); } inlength -= inpos.get() - init; - num -= outpos.get(); + num -= outpos.get() - outposInit; F2.headlessUncompress(in, inpos, inlength, out, outpos, num); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int init = compressedPositions.get(); + int maxLength = F1.maxHeadlessCompressedLength(compressedPositions, inlength); + maxLength += 1; // Add +1 for the potential F2 header. Question: is this header actually needed in the headless version? + inlength -= compressedPositions.get() - init; + maxLength += F2.maxHeadlessCompressedLength(compressedPositions, inlength); + return maxLength; + } + @Override public String toString() { return F1.toString() + "+" + F2.toString(); diff --git a/src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java b/src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java index c10d2f0..b9bdc04 100644 --- a/src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java +++ b/src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java @@ -10,10 +10,11 @@ /** * Interface describing a standard CODEC to compress integers. This is a - * variation on the IntegerCODEC interface meant to be used for random access. + * variation on the IntegerCODEC interface meant to be used for random access + * (i.e., given a large array, you can segment it and decode just the subarray you need). * - * The main difference is that we must specify the number of integers we wish to - * decode. This information should be stored elsewhere. + * The main difference is that you must specify the number of integers you wish to + * uncompress. This information should be stored elsewhere. * * This interface was designed by the Terrier team for their search engine. * @@ -25,14 +26,17 @@ public interface SkippableIntegerCODEC { * Compress data from an array to another array. * * Both inpos and outpos are modified to represent how much data was read - * and written to if 12 ints (inlength = 12) are compressed to 3 ints, then - * inpos will be incremented by 12 while outpos will be incremented by 3 we + * and written to. If 12 ints (inlength = 12) are compressed to 3 ints, then + * inpos will be incremented by 12 while outpos will be incremented by 3. We * use IntWrapper to pass the values by reference. * + * Implementation note: contrary to {@link IntegerCODEC#compress}, + * this may skip writing information about the number of encoded integers. + * * @param in * input array * @param inpos - * location in the input array + * where to start reading in the array * @param inlength * how many integers to compress * @param out @@ -56,13 +60,30 @@ public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int[] out * @param inlength * length of the compressed data (ignored by some schemes) * @param out - * array where to write the compressed output + * array where to write the uncompressed output * @param outpos - * where to write the compressed output in out + * where to start writing the uncompressed output in out * @param num - * number of integers we want to decode, the actual number of integers decoded can be less + * number of integers we want to decode. May be less than the actual number of compressed integers */ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos, int num); + /** + * Compute the maximum number of integers that might be required to store + * the compressed form of a given input array segment, without headers. + *

+ * This is useful to pre-allocate the output buffer before calling + * {@link #headlessCompress(int[], IntWrapper, int, int[], IntWrapper)}. + *

+ * + * @param compressedPositions + * since not all schemes compress every input integer, this parameter + * returns how many input integers will actually be compressed. + * This is useful when composing multiple schemes. + * @param inlength + * number of integers to be compressed + * @return the maximum number of integers needed in the output array + */ + int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength); } diff --git a/src/main/java/me/lemire/integercompression/UncompressibleInputException.java b/src/main/java/me/lemire/integercompression/UncompressibleInputException.java deleted file mode 100644 index c490946..0000000 --- a/src/main/java/me/lemire/integercompression/UncompressibleInputException.java +++ /dev/null @@ -1,19 +0,0 @@ -package me.lemire.integercompression; - -/** - * This exception might be thrown if the input is poorly compressible. - * - */ -public class UncompressibleInputException extends RuntimeException { - - /** - * Create new exception - * @param string explanation for the exception - */ - public UncompressibleInputException(String string) { - super(string); - } - - private static final long serialVersionUID = -798583799846489873L; - -} diff --git a/src/main/java/me/lemire/integercompression/Util.java b/src/main/java/me/lemire/integercompression/Util.java index 346e3b2..63fc918 100644 --- a/src/main/java/me/lemire/integercompression/Util.java +++ b/src/main/java/me/lemire/integercompression/Util.java @@ -15,13 +15,13 @@ public final class Util { - - // check whether x is small than y as unsigned ints (supported by Java 8 natively); - protected static final boolean smallerorequalthan(int x, int y) { - return (x + Integer.MIN_VALUE) <= (y + Integer.MIN_VALUE); - } - - /** + + // check whether x is small than y as unsigned ints (supported by Java 8 natively); + protected static final boolean smallerorequalthan(int x, int y) { + return (x + Integer.MIN_VALUE) <= (y + Integer.MIN_VALUE); + } + + /** * Compute the maximum of the integer logarithms (ceil(log(x+1)) of a range * of value * diff --git a/src/main/java/me/lemire/integercompression/VariableByte.java b/src/main/java/me/lemire/integercompression/VariableByte.java index 5b25c43..c9b04d0 100644 --- a/src/main/java/me/lemire/integercompression/VariableByte.java +++ b/src/main/java/me/lemire/integercompression/VariableByte.java @@ -21,6 +21,8 @@ */ public class VariableByte implements IntegerCODEC, ByteIntegerCODEC, SkippableIntegerCODEC { + private static final int MAX_BYTES_PER_INT = 5; + private static byte extract7bits(int i, long val) { return (byte) ((val >> (7 * i)) & ((1 << 7) - 1)); } @@ -122,8 +124,11 @@ public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out, for (int v = 0, shift = 0; p < finalp;) { val = in[p]; int c = (byte) (val >>> s); + // Shift to next byte s += 8; + // Shift to next integer if s==32 p += s>>5; + // cycle from 31 to 0 s = s & 31; v += ((c & 127) << shift); if ((c & 128) == 128) { @@ -187,8 +192,11 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o for (int v = 0, shift = 0; tmpoutpos < finaloutpos;) { val = in[p]; int c = val >>> s; + // Shift to next byte s += 8; + // Shift to next integer if s==32 p += s>>5; + // cycle from 31 to 0 s = s & 31; v += ((c & 127) << shift); if ((c & 128) == 128) { @@ -202,12 +210,23 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o inpos.set(p + (s!=0 ? 1 : 0)); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int maxLengthInBytes = inlength * MAX_BYTES_PER_INT; + int maxLengthInInts = (maxLengthInBytes + Integer.BYTES - 1) / Integer.BYTES; + compressedPositions.add(inlength); + return maxLengthInInts; + } + /** * Creates a new buffer of the requested size. * * In case you need a different way to allocate buffers, you can override this method * with a custom behavior. The default implementation allocates a new Java direct * {@link ByteBuffer} on each invocation. + * + * @param sizeInBytes + * @return */ protected ByteBuffer makeBuffer(int sizeInBytes) { return ByteBuffer.allocateDirect(sizeInBytes); diff --git a/src/main/java/me/lemire/integercompression/benchmarktools/Benchmark.java b/src/main/java/me/lemire/integercompression/benchmarktools/Benchmark.java index c5fee69..ef4a386 100644 --- a/src/main/java/me/lemire/integercompression/benchmarktools/Benchmark.java +++ b/src/main/java/me/lemire/integercompression/benchmarktools/Benchmark.java @@ -308,10 +308,10 @@ private static void testByteCodec(PrintWriter csvLog, int sparsity, public static void main(String args[]) throws FileNotFoundException { System.out .println("# benchmark based on the ClusterData model from:"); - System.out.println("# Vo Ngoc Anh and Alistair Moffat. "); - System.out.println("# Index compression using 64-bit words."); + System.out.println("# Vo Ngoc Anh and Alistair Moffat. "); + System.out.println("# Index compression using 64-bit words."); System.out - .println("# Softw. Pract. Exper.40, 2 (February 2010), 131-147. "); + .println("# Softw. Pract. Exper.40, 2 (February 2010), 131-147. "); System.out.println(); PrintWriter writer = null; diff --git a/src/main/java/me/lemire/integercompression/benchmarktools/BenchmarkSkippable.java b/src/main/java/me/lemire/integercompression/benchmarktools/BenchmarkSkippable.java index 58bbc4a..b930568 100644 --- a/src/main/java/me/lemire/integercompression/benchmarktools/BenchmarkSkippable.java +++ b/src/main/java/me/lemire/integercompression/benchmarktools/BenchmarkSkippable.java @@ -83,7 +83,6 @@ private static int decompressFromSkipTable(Object c, int[] compressed, if (num > length - uncomppos.get()) num = length - uncomppos.get(); int location = metadata[metapos++]; - // System.out.println("location = "+location); int initvalue = metadata[metapos++]; int outputlocation = uncomppos.get(); if (location != compressedpos.get()) @@ -242,10 +241,10 @@ private static void testCodec(PrintWriter csvLog, int sparsity, Object c, */ public static void main(String args[]) throws FileNotFoundException { System.out.println("# benchmark based on the ClusterData model from:"); - System.out.println("# Vo Ngoc Anh and Alistair Moffat. "); - System.out.println("# Index compression using 64-bit words."); + System.out.println("# Vo Ngoc Anh and Alistair Moffat. "); + System.out.println("# Index compression using 64-bit words."); System.out - .println("# Softw. Pract. Exper.40, 2 (February 2010), 131-147. "); + .println("# Softw. Pract. Exper.40, 2 (February 2010), 131-147. "); System.out.println(); PrintWriter writer = null; diff --git a/src/main/java/me/lemire/integercompression/differential/IntegratedBinaryPacking.java b/src/main/java/me/lemire/integercompression/differential/IntegratedBinaryPacking.java index 7e1c161..f50a367 100644 --- a/src/main/java/me/lemire/integercompression/differential/IntegratedBinaryPacking.java +++ b/src/main/java/me/lemire/integercompression/differential/IntegratedBinaryPacking.java @@ -49,7 +49,8 @@ public class IntegratedBinaryPacking implements IntegratedIntegerCODEC, SkippableIntegratedIntegerCODEC { - static final int BLOCK_SIZE = 32; + public static final int BLOCK_SIZE = 32; + private static final int MAX_BIT_WIDTH = Integer.SIZE; @Override public void compress(int[] in, IntWrapper inpos, int inlength, int[] out, @@ -170,4 +171,13 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, initvalue.set(initoffset); inpos.set(tmpinpos); } + + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int blockCount = inlength / BLOCK_SIZE; + int headersSizeInInts = blockCount / Integer.BYTES + (blockCount % Integer.BYTES); + int blocksSizeInInts = blockCount * MAX_BIT_WIDTH; + compressedPositions.add(blockCount * BLOCK_SIZE); + return headersSizeInInts + blocksSizeInInts; + } } diff --git a/src/main/java/me/lemire/integercompression/differential/IntegratedIntCompressor.java b/src/main/java/me/lemire/integercompression/differential/IntegratedIntCompressor.java index 5808bdd..1d935c4 100644 --- a/src/main/java/me/lemire/integercompression/differential/IntegratedIntCompressor.java +++ b/src/main/java/me/lemire/integercompression/differential/IntegratedIntCompressor.java @@ -3,7 +3,6 @@ import java.util.Arrays; import me.lemire.integercompression.IntWrapper; -import me.lemire.integercompression.UncompressibleInputException; /** * This is a convenience class that wraps a codec to provide @@ -36,19 +35,14 @@ public IntegratedIntCompressor() { * * @param input array to be compressed * @return compressed array - * @throws UncompressibleInputException if the data is too poorly compressible */ public int[] compress(int[] input) { - int [] compressed = new int[input.length + input.length / 100 + 1024]; + int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), input.length); + int [] compressed = new int[maxCompressedLength + 1]; // +1 to store the length of the input compressed[0] = input.length; IntWrapper outpos = new IntWrapper(1); IntWrapper initvalue = new IntWrapper(0); - try { - codec.headlessCompress(input, new IntWrapper(0), input.length, compressed, outpos, initvalue); - } catch (IndexOutOfBoundsException ioebe) { - throw new UncompressibleInputException( - "Your input is too poorly compressible with the current codec : " + codec); - } + codec.headlessCompress(input, new IntWrapper(0), input.length, compressed, outpos, initvalue); compressed = Arrays.copyOf(compressed,outpos.intValue()); return compressed; } diff --git a/src/main/java/me/lemire/integercompression/differential/IntegratedVariableByte.java b/src/main/java/me/lemire/integercompression/differential/IntegratedVariableByte.java index 918a900..a577031 100644 --- a/src/main/java/me/lemire/integercompression/differential/IntegratedVariableByte.java +++ b/src/main/java/me/lemire/integercompression/differential/IntegratedVariableByte.java @@ -24,6 +24,8 @@ public class IntegratedVariableByte implements IntegratedIntegerCODEC, IntegratedByteIntegerCODEC, SkippableIntegratedIntegerCODEC { + private static final int MAX_BYTES_PER_INT = 5; + private static byte extract7bits(int i, long val) { return (byte)((val >> (7 * i)) & ((1 << 7) - 1)); } @@ -257,6 +259,14 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, inpos.set(p + (s!=0 ? 1 : 0)); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int maxLengthInBytes = inlength * MAX_BYTES_PER_INT; + int maxLengthInInts = (maxLengthInBytes + Integer.BYTES - 1) / Integer.BYTES; + compressedPositions.add(inlength); + return maxLengthInInts; + } + /** * Creates a new buffer of the requested size. * diff --git a/src/main/java/me/lemire/integercompression/differential/SkippableIntegratedComposition.java b/src/main/java/me/lemire/integercompression/differential/SkippableIntegratedComposition.java index 09c4dd8..4786ec5 100644 --- a/src/main/java/me/lemire/integercompression/differential/SkippableIntegratedComposition.java +++ b/src/main/java/me/lemire/integercompression/differential/SkippableIntegratedComposition.java @@ -66,14 +66,25 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, if (inlength == 0) return; int init = inpos.get(); + int outposInit = outpos.get(); + F1.headlessUncompress(in, inpos, inlength, out, outpos,num,initvalue); if (inpos.get() == init) { - inpos.increment(); + inpos.increment(); } inlength -= inpos.get() - init; - num -= outpos.get(); + num -= outpos.get() - outposInit; F2.headlessUncompress(in, inpos, inlength, out, outpos,num,initvalue); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int init = compressedPositions.get(); + int maxLength = F1.maxHeadlessCompressedLength(compressedPositions, inlength); + maxLength += 1; // Add +1 for the potential F2 header. Question: is this header actually needed in the headless version? + inlength -= compressedPositions.get() - init; + maxLength += F2.maxHeadlessCompressedLength(compressedPositions, inlength); + return maxLength; + } } diff --git a/src/main/java/me/lemire/integercompression/differential/SkippableIntegratedIntegerCODEC.java b/src/main/java/me/lemire/integercompression/differential/SkippableIntegratedIntegerCODEC.java index 8b7fd4b..e2df754 100644 --- a/src/main/java/me/lemire/integercompression/differential/SkippableIntegratedIntegerCODEC.java +++ b/src/main/java/me/lemire/integercompression/differential/SkippableIntegratedIntegerCODEC.java @@ -71,4 +71,21 @@ public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int[] out public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos, int num, IntWrapper initvalue); + /** + * Compute the maximum number of integers that might be required to store + * the compressed form of a given input array segment, without headers. + *

+ * This is useful to pre-allocate the output buffer before calling + * {@link #headlessCompress(int[], IntWrapper, int, int[], IntWrapper, IntWrapper)}. + *

+ * + * @param compressedPositions + * since not all schemes compress every input integer, this parameter + * returns how many input integers will actually be compressed. + * This is useful when composing multiple schemes. + * @param inlength + * number of integers to be compressed + * @return the maximum number of integers needed in the output array + */ + int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength); } diff --git a/src/main/java/me/lemire/integercompression/synth/UniformDataGenerator.java b/src/main/java/me/lemire/integercompression/synth/UniformDataGenerator.java index bbd386a..a50497c 100644 --- a/src/main/java/me/lemire/integercompression/synth/UniformDataGenerator.java +++ b/src/main/java/me/lemire/integercompression/synth/UniformDataGenerator.java @@ -42,7 +42,7 @@ int[] generateUniformHash(int N, int Max) { int[] ans = new int[N]; HashSet s = new HashSet(); while (s.size() < N) - s.add(new Integer(this.rand.nextInt(Max))); + s.add(this.rand.nextInt(Max)); Iterator i = s.iterator(); for (int k = 0; k < N; ++k) ans[k] = i.next().intValue(); diff --git a/src/main/java/me/lemire/integercompression/vector/VectorBitPacker.java b/src/main/java/me/lemire/integercompression/vector/VectorBitPacker.java new file mode 100644 index 0000000..9b2e1ca --- /dev/null +++ b/src/main/java/me/lemire/integercompression/vector/VectorBitPacker.java @@ -0,0 +1,12790 @@ +// Copyright (C) 2022 Intel Corporation + +// SPDX-License-Identifier: Apache-2.0 + +package me.lemire.integercompression.vector; + +import java.util.Arrays; +import jdk.incubator.vector.*; + +/** + * Vectorized bitpacking routines. This class is a version of the + * VectorBitPackerTerse class that with less branch instructions. + * + * The code is machine generated from VectorBitPackerTerse.java using helper + * classes. + * + */ +public class VectorBitPacker { + private static final VectorSpecies SPECIES_512 = + IntVector.SPECIES_512; + private static final VectorSpecies SPECIES_256 = + IntVector.SPECIES_256; + private static final int VLEN_512 = 16; + private static final int VLEN_256 = 8; + private static final int BLOCK_SIZE = 256; + + private static final IntVector MASK_1 = + IntVector.broadcast(SPECIES_256, (1 << 1) - 1); + private static final IntVector MASK_2 = + IntVector.broadcast(SPECIES_512, (1 << 2) - 1); + private static final IntVector MASK_3 = + IntVector.broadcast(SPECIES_256, (1 << 3) - 1); + private static final IntVector MASK_4 = + IntVector.broadcast(SPECIES_512, (1 << 4) - 1); + private static final IntVector MASK_5 = + IntVector.broadcast(SPECIES_256, (1 << 5) - 1); + private static final IntVector MASK_6 = + IntVector.broadcast(SPECIES_512, (1 << 6) - 1); + private static final IntVector MASK_7 = + IntVector.broadcast(SPECIES_256, (1 << 7) - 1); + private static final IntVector MASK_8 = + IntVector.broadcast(SPECIES_512, (1 << 8) - 1); + private static final IntVector MASK_9 = + IntVector.broadcast(SPECIES_256, (1 << 9) - 1); + private static final IntVector MASK_10 = + IntVector.broadcast(SPECIES_512, (1 << 10) - 1); + private static final IntVector MASK_11 = + IntVector.broadcast(SPECIES_256, (1 << 11) - 1); + private static final IntVector MASK_12 = + IntVector.broadcast(SPECIES_512, (1 << 12) - 1); + private static final IntVector MASK_13 = + IntVector.broadcast(SPECIES_256, (1 << 13) - 1); + private static final IntVector MASK_14 = + IntVector.broadcast(SPECIES_512, (1 << 14) - 1); + private static final IntVector MASK_15 = + IntVector.broadcast(SPECIES_256, (1 << 15) - 1); + private static final IntVector MASK_16 = + IntVector.broadcast(SPECIES_512, (1 << 16) - 1); + private static final IntVector MASK_17 = + IntVector.broadcast(SPECIES_256, (1 << 17) - 1); + private static final IntVector MASK_18 = + IntVector.broadcast(SPECIES_512, (1 << 18) - 1); + private static final IntVector MASK_19 = + IntVector.broadcast(SPECIES_256, (1 << 19) - 1); + private static final IntVector MASK_20 = + IntVector.broadcast(SPECIES_512, (1 << 20) - 1); + private static final IntVector MASK_21 = + IntVector.broadcast(SPECIES_256, (1 << 21) - 1); + private static final IntVector MASK_22 = + IntVector.broadcast(SPECIES_512, (1 << 22) - 1); + private static final IntVector MASK_23 = + IntVector.broadcast(SPECIES_256, (1 << 23) - 1); + private static final IntVector MASK_24 = + IntVector.broadcast(SPECIES_512, (1 << 24) - 1); + private static final IntVector MASK_25 = + IntVector.broadcast(SPECIES_256, (1 << 25) - 1); + private static final IntVector MASK_26 = + IntVector.broadcast(SPECIES_512, (1 << 26) - 1); + private static final IntVector MASK_27 = + IntVector.broadcast(SPECIES_256, (1 << 27) - 1); + private static final IntVector MASK_28 = + IntVector.broadcast(SPECIES_512, (1 << 28) - 1); + private static final IntVector MASK_29 = + IntVector.broadcast(SPECIES_256, (1 << 29) - 1); + private static final IntVector MASK_30 = + IntVector.broadcast(SPECIES_512, (1 << 30) - 1); + private static final IntVector MASK_31 = + IntVector.broadcast(SPECIES_256, (1 << 31) - 1); + + /** + * Pack 32 integers + * + * @param in + * source array + * @param inpos + * position in source array + * @param out + * output array + * @param outpos + * position in output array + * @param b + * number of bits to use per integer + */ + public static void fastpack(final int[] in, int inpos, final int[] out, + int outpos, int b) { + switch (b) { + case 0: + break; + case 1: + fastpack1(in, inpos, out, outpos); + break; + case 2: + fastpack2(in, inpos, out, outpos); + break; + case 3: + fastpack3(in, inpos, out, outpos); + break; + case 4: + fastpack4(in, inpos, out, outpos); + break; + case 5: + fastpack5(in, inpos, out, outpos); + break; + case 6: + fastpack6(in, inpos, out, outpos); + break; + case 7: + fastpack7(in, inpos, out, outpos); + break; + case 8: + fastpack8(in, inpos, out, outpos); + break; + case 9: + fastpack9(in, inpos, out, outpos); + break; + case 10: + fastpack10(in, inpos, out, outpos); + break; + case 11: + fastpack11(in, inpos, out, outpos); + break; + case 12: + fastpack12(in, inpos, out, outpos); + break; + case 13: + fastpack13(in, inpos, out, outpos); + break; + case 14: + fastpack14(in, inpos, out, outpos); + break; + case 15: + fastpack15(in, inpos, out, outpos); + break; + case 16: + fastpack16(in, inpos, out, outpos); + break; + case 17: + fastpack17(in, inpos, out, outpos); + break; + case 18: + fastpack18(in, inpos, out, outpos); + break; + case 19: + fastpack19(in, inpos, out, outpos); + break; + case 20: + fastpack20(in, inpos, out, outpos); + break; + case 21: + fastpack21(in, inpos, out, outpos); + break; + case 22: + fastpack22(in, inpos, out, outpos); + break; + case 23: + fastpack23(in, inpos, out, outpos); + break; + case 24: + fastpack24(in, inpos, out, outpos); + break; + case 25: + fastpack25(in, inpos, out, outpos); + break; + case 26: + fastpack26(in, inpos, out, outpos); + break; + case 27: + fastpack27(in, inpos, out, outpos); + break; + case 28: + fastpack28(in, inpos, out, outpos); + break; + case 29: + fastpack29(in, inpos, out, outpos); + break; + case 30: + fastpack30(in, inpos, out, outpos); + break; + case 31: + fastpack31(in, inpos, out, outpos); + break; + case 32: + System.arraycopy(in, inpos, out, outpos, BLOCK_SIZE); + break; + } + } + + static void fastpackNoMask(final int[] in, int inpos, final int[] out, + int outpos, int b) { + switch (b) { + case 0: + break; + case 1: + fastpackNoMask1(in, inpos, out, outpos); + break; + case 2: + fastpackNoMask2(in, inpos, out, outpos); + break; + case 3: + fastpackNoMask3(in, inpos, out, outpos); + break; + case 4: + fastpackNoMask4(in, inpos, out, outpos); + break; + case 5: + fastpackNoMask5(in, inpos, out, outpos); + break; + case 6: + fastpackNoMask6(in, inpos, out, outpos); + break; + case 7: + fastpackNoMask7(in, inpos, out, outpos); + break; + case 8: + fastpackNoMask8(in, inpos, out, outpos); + break; + case 9: + fastpackNoMask9(in, inpos, out, outpos); + break; + case 10: + fastpackNoMask10(in, inpos, out, outpos); + break; + case 11: + fastpackNoMask11(in, inpos, out, outpos); + break; + case 12: + fastpackNoMask12(in, inpos, out, outpos); + break; + case 13: + fastpackNoMask13(in, inpos, out, outpos); + break; + case 14: + fastpackNoMask14(in, inpos, out, outpos); + break; + case 15: + fastpackNoMask15(in, inpos, out, outpos); + break; + case 16: + fastpackNoMask16(in, inpos, out, outpos); + break; + case 17: + fastpackNoMask17(in, inpos, out, outpos); + break; + case 18: + fastpackNoMask18(in, inpos, out, outpos); + break; + case 19: + fastpackNoMask19(in, inpos, out, outpos); + break; + case 20: + fastpackNoMask20(in, inpos, out, outpos); + break; + case 21: + fastpackNoMask21(in, inpos, out, outpos); + break; + case 22: + fastpackNoMask22(in, inpos, out, outpos); + break; + case 23: + fastpackNoMask23(in, inpos, out, outpos); + break; + case 24: + fastpackNoMask24(in, inpos, out, outpos); + break; + case 25: + fastpackNoMask25(in, inpos, out, outpos); + break; + case 26: + fastpackNoMask26(in, inpos, out, outpos); + break; + case 27: + fastpackNoMask27(in, inpos, out, outpos); + break; + case 28: + fastpackNoMask28(in, inpos, out, outpos); + break; + case 29: + fastpackNoMask29(in, inpos, out, outpos); + break; + case 30: + fastpackNoMask30(in, inpos, out, outpos); + break; + case 31: + fastpackNoMask31(in, inpos, out, outpos); + break; + case 32: + System.arraycopy(in, inpos, out, outpos, BLOCK_SIZE); + break; + } + } + + /** + * Unpack 32 integers + * + * @param in + * source array + * @param inpos + * position in source array + * @param out + * output array + * @param outpos + * position in output array + * @param b + * number of bits to use per integer + */ + public static void fastunpack(final int[] in, int inpos, final int[] out, + int outpos, int b) { + switch (b) { + case 0: + Arrays.fill(out, outpos, outpos + 256, 0); + break; + case 1: + fastunpack1(in, inpos, out, outpos); + break; + case 2: + fastunpack2(in, inpos, out, outpos); + break; + case 3: + fastunpack3(in, inpos, out, outpos); + break; + case 4: + fastunpack4(in, inpos, out, outpos); + break; + case 5: + fastunpack5(in, inpos, out, outpos); + break; + case 6: + fastunpack6(in, inpos, out, outpos); + break; + case 7: + fastunpack7(in, inpos, out, outpos); + break; + case 8: + fastunpack8(in, inpos, out, outpos); + break; + case 9: + fastunpack9(in, inpos, out, outpos); + break; + case 10: + fastunpack10(in, inpos, out, outpos); + break; + case 11: + fastunpack11(in, inpos, out, outpos); + break; + case 12: + fastunpack12(in, inpos, out, outpos); + break; + case 13: + fastunpack13(in, inpos, out, outpos); + break; + case 14: + fastunpack14(in, inpos, out, outpos); + break; + case 15: + fastunpack15(in, inpos, out, outpos); + break; + case 16: + fastunpack16(in, inpos, out, outpos); + break; + case 17: + fastunpack17(in, inpos, out, outpos); + break; + case 18: + fastunpack18(in, inpos, out, outpos); + break; + case 19: + fastunpack19(in, inpos, out, outpos); + break; + case 20: + fastunpack20(in, inpos, out, outpos); + break; + case 21: + fastunpack21(in, inpos, out, outpos); + break; + case 22: + fastunpack22(in, inpos, out, outpos); + break; + case 23: + fastunpack23(in, inpos, out, outpos); + break; + case 24: + fastunpack24(in, inpos, out, outpos); + break; + case 25: + fastunpack25(in, inpos, out, outpos); + break; + case 26: + fastunpack26(in, inpos, out, outpos); + break; + case 27: + fastunpack27(in, inpos, out, outpos); + break; + case 28: + fastunpack28(in, inpos, out, outpos); + break; + case 29: + fastunpack29(in, inpos, out, outpos); + break; + case 30: + fastunpack30(in, inpos, out, outpos); + break; + case 31: + fastunpack31(in, inpos, out, outpos); + break; + case 32: + System.arraycopy(in, inpos, out, outpos, BLOCK_SIZE); + break; + } + } + + public static int slowpack(final int[] in, int inpos, int inlen, + final int[] out, int outpos, int b) { + if (inlen == 0) + return outpos; + if (b == 32) { + System.arraycopy(in, inpos, out, outpos, inlen); + return outpos + inlen; + } + int mask = (1 << b) - 1; + int c = 0; + int l = 0; + int r = 0; + int val = 0; + for (int i = 0; i < inlen; i++) { + val = in[inpos + i] & mask; + out[outpos] |= val << (c + r); + c += b; + l = (32 - r) % b; + if (c + r >= 32) { + if (i < inlen - 1 || l != 0) + outpos++; + r = l == 0 ? 0 : b - l; + if (l != 0) + out[outpos] = val >> (b - r); + c = 0; + } + } + return outpos; + } + + public static int slowunpack(final int[] in, int inpos, final int[] out, + int outpos, int outlen, int b) { + if (outlen == 0) { + return inpos; + } + if (b == 32) { + System.arraycopy(in, inpos, out, outpos, outlen); + return inpos + outlen; + } + int mask = (1 << b) - 1; + int limit = outpos + outlen; + int r = 0; + int val = 0; + int i = 0; + for (; outpos < limit; i++) { + if (r > 0) + out[outpos++] = + (val >>> (32 - (b - r))) | ((in[inpos + i] << (b - r)) & mask); + val = in[inpos + i]; + int j = 0; + int l = 32 - r; + int ll = l % b == 0 ? l : l - b; + while (j < ll && outpos < limit) { + out[outpos++] = (val >> (j + r)) & mask; + j += b; + } + r = l % b == 0 ? 0 : b - (l % b); + } + return inpos + i; + } + + public static int numCompressedInts(int n, int b) { + int width = b % 2 == 0 ? VLEN_512 : VLEN_256; + if (n <= width) + return n; + int intsPerVec = (32 / b) * width; + int q = (n + intsPerVec - 1) / intsPerVec; + return q * width; + } + + private static void fastpack1(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV.and(MASK_1); + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 15).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 17).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 19).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 21).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 22).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 23).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 24).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 25).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 26).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 27).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 28).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 29).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 30).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack2(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV.and(MASK_2); + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 22).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 24).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 26).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 28).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack3(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV.and(MASK_3); + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 15).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 21).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 24).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 27).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 19).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 22).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 25).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 28).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 17).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 23).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 26).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack4(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV.and(MASK_4); + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 24).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(MASK_4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 24).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack5(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV.and(MASK_5); + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 15).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 25).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 23).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 21).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 26).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 19).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 24).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 17).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 22).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack6(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV.and(MASK_6); + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 24).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.and(MASK_6).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 22).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_6).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack7(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV.and(MASK_7); + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 21).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 17).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 24).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 23).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 19).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 15).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 22).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack8(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV.and(MASK_8); + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(MASK_8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(MASK_8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(MASK_8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack9(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV.and(MASK_9); + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 22).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 17).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 21).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 15).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 19).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack10(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV.and(MASK_10); + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack11(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV.and(MASK_11); + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 15).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 17).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 19).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack12(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV.and(MASK_12); + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(MASK_12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack13(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV.and(MASK_13); + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 15).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 17).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack14(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV.and(MASK_14); + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack15(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV.and(MASK_15); + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 15).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack16(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV.and(MASK_16); + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(MASK_16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(MASK_16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(MASK_16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(MASK_16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(MASK_16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(MASK_16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.and(MASK_16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack17(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV.and(MASK_17); + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack18(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV.and(MASK_18); + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack19(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV.and(MASK_19); + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack20(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV.and(MASK_20); + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(MASK_20).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack21(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV.and(MASK_21); + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack22(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV.and(MASK_22); + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack23(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV.and(MASK_23); + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 22); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack24(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV.and(MASK_24); + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(MASK_24).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(MASK_24).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(MASK_24).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack25(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV.and(MASK_25); + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 24); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 22); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack26(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV.and(MASK_26); + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 24); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 22); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack27(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV.and(MASK_27); + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 26); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 24); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 22); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack28(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV.and(MASK_28); + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(MASK_28).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack29(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV.and(MASK_29); + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 24); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 22); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 28); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 26); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack30(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV.and(MASK_30); + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 22); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 24); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 26); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 28); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpack31(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV.and(MASK_31); + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 22); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 24); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 26); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 28); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 30); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 1).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask1(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask2(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask3(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask4(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask5(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask6(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask7(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask8(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask9(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask10(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask11(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask12(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask13(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask14(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask15(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask16(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask17(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask18(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask19(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask20(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask21(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask22(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask23(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 22); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask24(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask25(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 24); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 22); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask26(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 22); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask27(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 26); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 24); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 22); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask28(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.or(oV); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask29(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 24); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 22); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 28); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 26); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask30(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 22); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 26); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 28); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastpackNoMask31(final int[] in, int inpos, + final int[] out, int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV; + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 1); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 2); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 4); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 6); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 8); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 10); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 12); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 14); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 16); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 18); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 20); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 22); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 24); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 26); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 28); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 30); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 248); + oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV); + + oV.intoArray(out, outpos); + } + + private static void fastunpack1(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + iV.and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 1).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 3).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 5).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 7).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 9).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 10).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 11).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 13).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 14).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 15).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 17).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 18).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 19).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 20).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 21).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 22).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 23).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 24).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 25).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 26).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 27).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 28).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 29).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 30).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 31).and(MASK_1).intoArray(out, outpos); + outpos += VLEN_256; + } + + private static void fastunpack2(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + iV.and(MASK_2).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_2).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_2).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_2).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_2).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 10).and(MASK_2).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_2).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 14).and(MASK_2).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_2).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 18).and(MASK_2).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 20).and(MASK_2).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 22).and(MASK_2).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 24).and(MASK_2).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 26).and(MASK_2).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 28).and(MASK_2).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 30).and(MASK_2).intoArray(out, outpos); + outpos += VLEN_512; + } + + private static void fastunpack3(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + iV.and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 3).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 9).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 15).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 18).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 21).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 24).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 27).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(1).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 1).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 7).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 10).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 13).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 19).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 22).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 25).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 28).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_3); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 1).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 5).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 11).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 14).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 17).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 20).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 23).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 26).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 29).and(MASK_3).intoArray(out, outpos); + outpos += VLEN_256; + } + + private static void fastunpack4(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + iV.and(MASK_4).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos); + outpos += VLEN_512; + + var oV = iV.and(MASK_4); + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(0xf).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos); + outpos += VLEN_512; + } + + private static void fastunpack5(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + iV.and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 5).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 10).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 15).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 20).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 25).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(7).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 3).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 13).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 18).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 23).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(1).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 1).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 11).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 21).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 26).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 1).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 9).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 14).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 19).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 24).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_5); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 7).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 17).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 22).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 27).and(MASK_5).intoArray(out, outpos); + outpos += VLEN_256; + } + + private static void fastunpack6(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + iV.and(MASK_6).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_6).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_6).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 18).and(MASK_6).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 24).and(MASK_6).intoArray(out, outpos); + outpos += VLEN_512; + + var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_6); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_6).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 10).and(MASK_6).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_6).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 22).and(MASK_6).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_6); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_6).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_6).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 14).and(MASK_6).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 20).and(MASK_6).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 26).and(MASK_6).intoArray(out, outpos); + outpos += VLEN_512; + } + + private static void fastunpack7(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + iV.and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 7).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 14).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 21).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + var oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(7).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 3).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 10).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 17).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 24).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 1).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 13).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 20).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 9).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 23).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 5).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 19).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(1).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 1).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 15).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 22).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_7); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 11).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 18).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 25).and(MASK_7).intoArray(out, outpos); + outpos += VLEN_256; + } + + private static void fastunpack8(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + iV.and(MASK_8).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos); + outpos += VLEN_512; + + var oV = iV.and(MASK_8); + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(0xff).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(0xff).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(0xff).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos); + outpos += VLEN_512; + } + + private static void fastunpack9(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + iV.and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 9).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 18).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + var oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 13).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 22).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 1).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 17).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(7).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 3).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 21).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 7).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 11).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 20).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 15).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(1).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 1).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 10).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 19).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_9); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 5).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 14).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 23).and(MASK_9).intoArray(out, outpos); + outpos += VLEN_256; + } + + private static void fastunpack10(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + iV.and(MASK_10).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 10).and(MASK_10).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 20).and(MASK_10).intoArray(out, outpos); + outpos += VLEN_512; + + var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_10); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_10).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 18).and(MASK_10).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_10); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_10).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_10).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_10); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_10).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 14).and(MASK_10).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_10); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_10).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_10).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 22).and(MASK_10).intoArray(out, outpos); + outpos += VLEN_512; + } + + private static void fastunpack11(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + iV.and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 11).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + var oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(1).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 1).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 13).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(7).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 3).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 14).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 15).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 5).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 17).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 7).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 18).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 19).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 9).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 20).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_11); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 1).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 10).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 21).and(MASK_11).intoArray(out, outpos); + outpos += VLEN_256; + } + + private static void fastunpack12(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + iV.and(MASK_12).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos); + outpos += VLEN_512; + + var oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(0xfff).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos); + outpos += VLEN_512; + } + + private static void fastunpack13(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + iV.and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 13).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + var oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 7).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(1).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 1).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 14).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 15).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 9).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(7).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 3).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 10).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 17).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 11).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 5).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 18).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 1).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_13); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 19).and(MASK_13).intoArray(out, outpos); + outpos += VLEN_256; + } + + private static void fastunpack14(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + iV.and(MASK_14).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 14).and(MASK_14).intoArray(out, outpos); + outpos += VLEN_512; + + var oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_14); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 10).and(MASK_14).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_14); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_14).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_14); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_14).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_14).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_14); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_14).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_14); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_14).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_14); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_14).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 18).and(MASK_14).intoArray(out, outpos); + outpos += VLEN_512; + } + + private static void fastunpack15(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + iV.and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 15).and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + + var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 13).and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 11).and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 9).and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 7).and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 5).and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(7).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 3).and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(1).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 1).and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 1).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 14).and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 10).and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_15); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 17).and(MASK_15).intoArray(out, outpos); + outpos += VLEN_256; + } + + private static void fastunpack16(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + iV.and(MASK_16).intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos); + outpos += VLEN_512; + + var oV = iV.and(MASK_16); + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(0xffff).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(0xffff).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(0xffff).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(0xffff).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(0xffff).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(0xffff).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(0xffff).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos); + outpos += VLEN_512; + } + + private static void fastunpack17(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + iV.and(MASK_17).intoArray(out, outpos); + outpos += VLEN_256; + + var oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_17).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_17).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_17).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_17).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 10).and(MASK_17).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_17).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 14).and(MASK_17).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 1).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(1).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 1).and(MASK_17).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(7).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 3).and(MASK_17).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 5).and(MASK_17).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 7).and(MASK_17).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 9).and(MASK_17).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 11).and(MASK_17).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 13).and(MASK_17).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_17); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(0x7fff).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 15).and(MASK_17).intoArray(out, outpos); + outpos += VLEN_256; + } + + private static void fastunpack18(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + iV.and(MASK_18).intoArray(out, outpos); + outpos += VLEN_512; + + var oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_18); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_18).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_18); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_18).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_18); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_18).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_18); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_18); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_18).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_18); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_18).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_18); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 10).and(MASK_18).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_18); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 14).and(MASK_18).intoArray(out, outpos); + outpos += VLEN_512; + } + + private static void fastunpack19(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + iV.and(MASK_19).intoArray(out, outpos); + outpos += VLEN_256; + + var oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_19).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_19).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 1).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 5).and(MASK_19).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 11).and(MASK_19).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(0x1ffff).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_19).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 10).and(MASK_19).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(7).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 3).and(MASK_19).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 9).and(MASK_19).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(0x7fff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_19).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_19).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(1).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 1).and(MASK_19).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 7).and(MASK_19).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_19); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 13).and(MASK_19).intoArray(out, outpos); + outpos += VLEN_256; + } + + private static void fastunpack20(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + iV.and(MASK_20).intoArray(out, outpos); + outpos += VLEN_512; + + var oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_20).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_20).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_20).intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(0xfffff).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_20).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_20).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_20); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 12).and(MASK_20).intoArray(out, outpos); + outpos += VLEN_512; + } + + private static void fastunpack21(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + iV.and(MASK_21).intoArray(out, outpos); + outpos += VLEN_256; + + var oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 10).and(MASK_21).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 1).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 9).and(MASK_21).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(0x7ffff).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_21).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 7).and(MASK_21).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(0x1ffff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_21).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 5).and(MASK_21).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(0x7fff).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_21).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(7).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 3).and(MASK_21).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_21).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(1).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 1).and(MASK_21).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_21); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 11).and(MASK_21).intoArray(out, outpos); + outpos += VLEN_256; + } + + private static void fastunpack22(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + iV.and(MASK_22).intoArray(out, outpos); + outpos += VLEN_512; + + var oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_22); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_22); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_22).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_22); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_22); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_22).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_22); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_22); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_22).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_22); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_22); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_22).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_22); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_22); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 10).and(MASK_22).intoArray(out, outpos); + outpos += VLEN_512; + } + + private static void fastunpack23(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + iV.and(MASK_23).intoArray(out, outpos); + outpos += VLEN_256; + + var oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 5).and(MASK_23).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(0x7ffff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(1).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 1).and(MASK_23).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(0x7fff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_23).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_23).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 7).and(MASK_23).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(0x1fffff).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(7).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 3).and(MASK_23).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(0x1ffff).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_23).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(0x3fffff).lanewise(VectorOperators.LSHL, 1).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_23).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_23); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 9).and(MASK_23).intoArray(out, outpos); + outpos += VLEN_256; + } + + private static void fastunpack24(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + iV.and(MASK_24).intoArray(out, outpos); + outpos += VLEN_512; + + var oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(0xffffff).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(0xffffff).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(0xffffff).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos); + outpos += VLEN_512; + } + + private static void fastunpack25(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + iV.and(MASK_25).intoArray(out, outpos); + outpos += VLEN_256; + + var oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_25).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(0x3fffff).lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(0x7fff).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(1).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 1).and(MASK_25).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(0x7ffff).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 5).and(MASK_25).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(0x7fffff).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_25).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_25).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(0xffffff).lanewise(VectorOperators.LSHL, 1).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(0x1ffff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(7).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 3).and(MASK_25).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(0x1fffff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_25); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 7).and(MASK_25).intoArray(out, outpos); + outpos += VLEN_256; + } + + private static void fastunpack26(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + iV.and(MASK_26).intoArray(out, outpos); + outpos += VLEN_512; + + var oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_26); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_26); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_26); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_26); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_26).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_26); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(0x3fffff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_26); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_26); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_26); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_26).intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_26); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(0xffffff).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_26); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_26); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_26); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 6).and(MASK_26).intoArray(out, outpos); + outpos += VLEN_512; + } + + private static void fastunpack27(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + iV.and(MASK_27).intoArray(out, outpos); + outpos += VLEN_256; + + var oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(0x3fffff).lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(0x1ffff).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 7).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_27).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(0xffffff).lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(0x7ffff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_27).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(0x3ffffff).lanewise(VectorOperators.LSHL, 1).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(0x1fffff).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(1).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 1).and(MASK_27).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(0x7fffff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(7).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 3).and(MASK_27).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(0x1ffffff).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(0x7fff).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_27); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 5).and(MASK_27).intoArray(out, outpos); + outpos += VLEN_256; + } + + private static void fastunpack28(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + iV.and(MASK_28).intoArray(out, outpos); + outpos += VLEN_512; + + var oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_28); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(0xffffff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_28); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_28); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_28); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_28); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_28); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_28).intoArray(out, outpos); + outpos += VLEN_512; + + oV = oV.zero(SPECIES_512); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(0xfffffff).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_28); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(0xffffff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_28); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_28); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_28); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_28); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_28); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 4).and(MASK_28).intoArray(out, outpos); + outpos += VLEN_512; + } + + private static void fastunpack29(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + iV.and(MASK_29).intoArray(out, outpos); + outpos += VLEN_256; + + var oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(0x3ffffff).lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(0x7fffff).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(0x1ffff).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 5).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_29).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(0xfffffff).lanewise(VectorOperators.LSHL, 1).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(0x1ffffff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(0x3fffff).lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(0x7ffff).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 7).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 4).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(1).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 1).and(MASK_29).intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(0x7ffffff).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(0xffffff).lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(0x1fffff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(0x7fff).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_29); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(7).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 3).and(MASK_29).intoArray(out, outpos); + outpos += VLEN_256; + } + + private static void fastunpack30(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + iV.and(MASK_30).intoArray(out, outpos); + outpos += VLEN_512; + + var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_30); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 16); + oV = iV.and(0xfffffff).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_30); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 32); + oV = iV.and(0x3ffffff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_30); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 48); + oV = iV.and(0xffffff).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_30); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 64); + oV = iV.and(0x3fffff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_30); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 80); + oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_30); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 96); + oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_30); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 112); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_30); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 128); + oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_30); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 144); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_30); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 160); + oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_30); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 176); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_30); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 192); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_30); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 208); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + oV = iV.lanewise(VectorOperators.LSHR, 4).and(MASK_30); + + iV = IntVector.fromArray(SPECIES_512, in, inpos + 224); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_512; + + iV.lanewise(VectorOperators.LSHR, 2).and(MASK_30).intoArray(out, outpos); + outpos += VLEN_512; + } + + private static void fastunpack31(final int[] in, int inpos, final int[] out, + int outpos) { + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + iV.and(MASK_31).intoArray(out, outpos); + outpos += VLEN_256; + + var oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 8); + oV = iV.and(0x3fffffff).lanewise(VectorOperators.LSHL, 1).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 16); + oV = iV.and(0x1fffffff).lanewise(VectorOperators.LSHL, 2).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 24); + oV = iV.and(0xfffffff).lanewise(VectorOperators.LSHL, 3).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 32); + oV = iV.and(0x7ffffff).lanewise(VectorOperators.LSHL, 4).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 40); + oV = iV.and(0x3ffffff).lanewise(VectorOperators.LSHL, 5).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 48); + oV = iV.and(0x1ffffff).lanewise(VectorOperators.LSHL, 6).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 56); + oV = iV.and(0xffffff).lanewise(VectorOperators.LSHL, 7).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 64); + oV = iV.and(0x7fffff).lanewise(VectorOperators.LSHL, 8).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 72); + oV = iV.and(0x3fffff).lanewise(VectorOperators.LSHL, 9).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 80); + oV = iV.and(0x1fffff).lanewise(VectorOperators.LSHL, 10).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 88); + oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 11).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 96); + oV = iV.and(0x7ffff).lanewise(VectorOperators.LSHL, 12).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 104); + oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 13).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 112); + oV = iV.and(0x1ffff).lanewise(VectorOperators.LSHL, 14).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 120); + oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 15).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 128); + oV = iV.and(0x7fff).lanewise(VectorOperators.LSHL, 16).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 136); + oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 17).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 144); + oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 18).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 152); + oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 19).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 160); + oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 20).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 168); + oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 21).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 176); + oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 22).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 184); + oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 23).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 192); + oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 24).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 7).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 200); + oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 25).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 208); + oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 26).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 5).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 216); + oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 27).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 4).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 224); + oV = iV.and(7).lanewise(VectorOperators.LSHL, 28).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 3).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 232); + oV = iV.and(3).lanewise(VectorOperators.LSHL, 29).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + oV = iV.lanewise(VectorOperators.LSHR, 2).and(MASK_31); + + iV = IntVector.fromArray(SPECIES_256, in, inpos + 240); + oV = iV.and(1).lanewise(VectorOperators.LSHL, 30).or(oV); + + oV.intoArray(out, outpos); + outpos += VLEN_256; + + iV.lanewise(VectorOperators.LSHR, 1).and(MASK_31).intoArray(out, outpos); + outpos += VLEN_256; + } +} diff --git a/src/main/java/me/lemire/integercompression/vector/VectorBitPackerTerse.java b/src/main/java/me/lemire/integercompression/vector/VectorBitPackerTerse.java new file mode 100644 index 0000000..62a8cc7 --- /dev/null +++ b/src/main/java/me/lemire/integercompression/vector/VectorBitPackerTerse.java @@ -0,0 +1,963 @@ +// Copyright (C) 2022 Intel Corporation + +// SPDX-License-Identifier: Apache-2.0 + +package me.lemire.integercompression.vector; + +import java.util.Arrays; +import jdk.incubator.vector.*; + +/** + * This is a readable but less efficient version of the VectorBitPacker class. + * + */ +public class VectorBitPackerTerse { + static final VectorSpecies SPECIES_512 = IntVector.SPECIES_512; + static final VectorSpecies SPECIES_256 = IntVector.SPECIES_256; + static final int VLEN_512 = 16; + static final int VLEN_256 = 8; + static final int BLOCK_SIZE = 256; + + private static void fastpackOddBit(final int[] in, int inpos, final int[] out, + int outpos, int b, final int[] ho, + final int[] lc) { + final int mask = (1 << b) - 1; + final int N = 31 / b; + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV.and(mask); + int n = 1; + for (; n <= N; n++) { + iV = IntVector.fromArray(SPECIES_256, in, inpos + n * VLEN_256); + oV = iV.and(mask).lanewise(VectorOperators.LSHL, b * n).or(oV); + } + oV.intoArray(out, outpos); + outpos += VLEN_256; + + final int L = b - 1; + for (int i = 0; i < L; i++) { + oV = iV.and(mask).lanewise(VectorOperators.LSHR, ho[i]); + for (int j = 0; j < lc[i]; j++) { + iV = IntVector.fromArray(SPECIES_256, in, inpos + n * VLEN_256); + oV = iV.and(mask) + .lanewise(VectorOperators.LSHL, b * j + (b - ho[i])) + .or(oV); + n++; + } + oV.intoArray(out, outpos); + outpos += VLEN_256; + } + } + + private static void fastpackOddBitNoMask(final int[] in, int inpos, + final int[] out, int outpos, int b, + final int[] ho, final int[] lc) { + final int N = 31 / b; + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + var oV = iV; + int n = 1; + for (; n <= N; n++) { + iV = IntVector.fromArray(SPECIES_256, in, inpos + n * VLEN_256); + oV = iV.lanewise(VectorOperators.LSHL, b * n).or(oV); + } + oV.intoArray(out, outpos); + outpos += VLEN_256; + + final int L = b - 1; + for (int i = 0; i < L; i++) { + oV = iV.lanewise(VectorOperators.LSHR, ho[i]); + for (int j = 0; j < lc[i]; j++) { + iV = IntVector.fromArray(SPECIES_256, in, inpos + n * VLEN_256); + oV = iV.lanewise(VectorOperators.LSHL, b * j + (b - ho[i])).or(oV); + n++; + } + oV.intoArray(out, outpos); + outpos += VLEN_256; + } + } + + private static void fastUnpackOddBit(final int[] in, int inpos, + final int[] out, int outpos, int b, + final int[] lo, int[] masks, int[] lc) { + final int mask = (1 << b) - 1; + final int N = 32 / b; + var iV = IntVector.fromArray(SPECIES_256, in, inpos); + int n = 0; + for (; n < N; n++) { + iV.lanewise(VectorOperators.LSHR, b * n).and(mask).intoArray(out, outpos); + outpos += VLEN_256; + } + var oV = iV.lanewise(VectorOperators.LSHR, b * n).and(mask); + + final int L = b - 1; + for (int i = 0; i < L; i++) { + iV = IntVector.fromArray(SPECIES_256, in, inpos + (i + 1) * VLEN_256); + oV = iV.and(masks[i]).lanewise(VectorOperators.LSHL, b - lo[i]).or(oV); + oV.intoArray(out, outpos); + outpos += VLEN_256; + int j = 0; + for (; j < lc[i]; j++) { + iV.lanewise(VectorOperators.LSHR, b * j + lo[i]) + .and(mask) + .intoArray(out, outpos); + outpos += VLEN_256; + n++; + } + oV = iV.lanewise(VectorOperators.LSHR, b * j + lo[i]).and(mask); + } + } + + private static void fastpackEvenBit(final int[] in, int inpos, + final int[] out, int outpos, int b, + final int[] ho, final int[] lc) { + final int mask = (1 << b) - 1; + final int N = 32 % b == 0 ? (32 / b) - 1 : 32 / b; + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV.and(mask); + int n = 1; + for (; n <= N; n++) { + iV = IntVector.fromArray(SPECIES_512, in, inpos + n * VLEN_512); + oV = iV.and(mask).lanewise(VectorOperators.LSHL, b * n).or(oV); + } + oV.intoArray(out, outpos); + outpos += VLEN_512; + + final int L = (b >>> 1) - 1; + for (int i = 0; i < L; i++) { + if (ho[i] != b) + oV = iV.and(mask).lanewise(VectorOperators.LSHR, ho[i]); + else + oV = oV.zero(SPECIES_512); + for (int j = 0; j < lc[i]; j++) { + iV = IntVector.fromArray(SPECIES_512, in, inpos + n * VLEN_512); + oV = iV.and(mask) + .lanewise(VectorOperators.LSHL, b * j + (b - ho[i])) + .or(oV); + n++; + } + oV.intoArray(out, outpos); + outpos += VLEN_512; + } + } + + private static void fastpackEvenBitNoMask(final int[] in, int inpos, + final int[] out, int outpos, int b, + final int[] ho, final int[] lc) { + final int N = 32 % b == 0 ? (32 / b) - 1 : 32 / b; + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + var oV = iV; + int n = 1; + for (; n <= N; n++) { + iV = IntVector.fromArray(SPECIES_512, in, inpos + n * VLEN_512); + oV = iV.lanewise(VectorOperators.LSHL, b * n).or(oV); + } + oV.intoArray(out, outpos); + outpos += VLEN_512; + + final int L = (b >>> 1) - 1; + for (int i = 0; i < L; i++) { + if (ho[i] != b) + oV = iV.lanewise(VectorOperators.LSHR, ho[i]); + else + oV = oV.zero(SPECIES_512); + for (int j = 0; j < lc[i]; j++) { + iV = IntVector.fromArray(SPECIES_512, in, inpos + n * VLEN_512); + oV = iV.lanewise(VectorOperators.LSHL, b * j + (b - ho[i])).or(oV); + n++; + } + oV.intoArray(out, outpos); + outpos += VLEN_512; + } + } + + private static void fastUnpackEventBit(final int[] in, int inpos, + final int[] out, int outpos, int b, + final int[] lo, int[] masks, + int[] lc) { + final int mask = (1 << b) - 1; + final int N = 32 / b; + var iV = IntVector.fromArray(SPECIES_512, in, inpos); + int n = 0; + for (; n < N; n++) { + iV.lanewise(VectorOperators.LSHR, b * n).and(mask).intoArray(out, outpos); + outpos += VLEN_512; + } + var oV = iV.lanewise(VectorOperators.LSHR, b * n).and(mask); + if ((b & (b - 1)) == 0) + oV = oV.zero(SPECIES_512); + + final int L = (b >>> 1) - 1; + for (int i = 0; i < L; i++) { + iV = IntVector.fromArray(SPECIES_512, in, inpos + (i + 1) * VLEN_512); + oV = iV.and(masks[i]).lanewise(VectorOperators.LSHL, b - lo[i]).or(oV); + oV.intoArray(out, outpos); + outpos += VLEN_512; + int j = 0; + for (; j < lc[i]; j++) { + iV.lanewise(VectorOperators.LSHR, b * j + lo[i]) + .and(mask) + .intoArray(out, outpos); + outpos += VLEN_512; + n++; + } + if ((32 - lo[i]) % b != 0) + oV = iV.lanewise(VectorOperators.LSHR, b * j + lo[i]).and(mask); + else + oV = oV.zero(SPECIES_512); + } + } + + public static int slowpack(final int[] in, int inpos, int inlen, + final int[] out, int outpos, int b) { + if (inlen == 0) + return outpos; + if (b == 32) { + System.arraycopy(in, inpos, out, outpos, inlen); + return outpos + inlen; + } + int mask = (1 << b) - 1; + int c = 0; + int l = 0; + int r = 0; + int val = 0; + for (int i = 0; i < inlen; i++) { + val = in[inpos + i] & mask; + out[outpos] |= val << (c + r); + c += b; + l = (32 - r) % b; + if (c + r >= 32) { + if (i < inlen - 1 || l != 0) + outpos++; + r = l == 0 ? 0 : b - l; + if (l != 0) + out[outpos] = val >> (b - r); + c = 0; + } + } + return outpos; + } + + public static int slowunpack(final int[] in, int inpos, final int[] out, + int outpos, int outlen, int b) { + if (outlen == 0) { + return inpos; + } + if (b == 32) { + System.arraycopy(in, inpos, out, outpos, outlen); + return inpos + outlen; + } + int mask = (1 << b) - 1; + int limit = outpos + outlen; + int r = 0; + int val = 0; + int i = 0; + for (; outpos < limit; i++) { + if (r > 0) + out[outpos++] = + (val >>> (32 - (b - r))) | ((in[inpos + i] << (b - r)) & mask); + val = in[inpos + i]; + int j = 0; + int l = 32 - r; + int ll = l % b == 0 ? l : l - b; + while (j < ll && outpos < limit) { + out[outpos++] = (val >> (j + r)) & mask; + j += b; + } + r = l % b == 0 ? 0 : b - (l % b); + } + return inpos + i; + } + + public static int numCompressedInts(int n, int b) { + int width = b % 2 == 0 ? VLEN_512 : VLEN_256; + if (n <= width) + return n; + int intsPerVec = (32 / b) * width; + int q = (n + intsPerVec - 1) / intsPerVec; + return q * width; + } + + public static void fastpack(final int[] in, int inpos, final int[] out, + int outpos, int b) { + switch (b) { + case 0: + break; + case 1: + fastpackOddBit(in, inpos, out, outpos, 1, new int[] {}, new int[] {}); + break; + case 2: + fastpackEvenBit(in, inpos, out, outpos, 2, new int[] {}, new int[] {}); + break; + case 3: + fastpackOddBit(in, inpos, out, outpos, 3, new int[] {0x2, 0x1}, + new int[] {0xb, 0xa}); + break; + case 4: + fastpackEvenBit(in, inpos, out, outpos, 4, new int[] {0x4}, + new int[] {0x8}); + break; + case 5: + fastpackOddBit(in, inpos, out, outpos, 5, new int[] {0x2, 0x4, 0x1, 0x3}, + new int[] {0x6, 0x7, 0x6, 0x6}); + break; + case 6: + fastpackEvenBit(in, inpos, out, outpos, 6, new int[] {0x2, 0x4}, + new int[] {0x5, 0x5}); + break; + case 7: + fastpackOddBit(in, inpos, out, outpos, 7, + new int[] {0x4, 0x1, 0x5, 0x2, 0x6, 0x3}, + new int[] {0x5, 0x4, 0x5, 0x4, 0x5, 0x4}); + break; + case 8: + fastpackEvenBit(in, inpos, out, outpos, 8, new int[] {0x8, 0x8, 0x8}, + new int[] {0x4, 0x4, 0x4}); + break; + case 9: + fastpackOddBit(in, inpos, out, outpos, 9, + new int[] {0x5, 0x1, 0x6, 0x2, 0x7, 0x3, 0x8, 0x4}, + new int[] {0x4, 0x3, 0x4, 0x3, 0x4, 0x3, 0x4, 0x3}); + break; + case 10: + fastpackEvenBit(in, inpos, out, outpos, 10, + new int[] {0x2, 0x4, 0x6, 0x8}, + new int[] {0x3, 0x3, 0x3, 0x3}); + break; + case 11: + fastpackOddBit( + in, inpos, out, outpos, 11, + new int[] {0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1}, + new int[] {0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x2}); + break; + case 12: + fastpackEvenBit(in, inpos, out, outpos, 12, + new int[] {0x8, 0x4, 0xc, 0x8, 0x4}, + new int[] {0x3, 0x2, 0x3, 0x3, 0x2}); + break; + case 13: + fastpackOddBit(in, inpos, out, outpos, 13, + new int[] {0x6, 0xc, 0x5, 0xb, 0x4, 0xa, 0x3, 0x9, 0x2, + 0x8, 0x1, 0x7}, + new int[] {0x2, 0x3, 0x2, 0x3, 0x2, 0x3, 0x2, 0x3, 0x2, + 0x3, 0x2, 0x2}); + break; + case 14: + fastpackEvenBit(in, inpos, out, outpos, 14, + new int[] {0x4, 0x8, 0xc, 0x2, 0x6, 0xa}, + new int[] {0x2, 0x2, 0x3, 0x2, 0x2, 0x2}); + break; + case 15: + fastpackOddBit(in, inpos, out, outpos, 15, + new int[] {0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x1, 0x3, + 0x5, 0x7, 0x9, 0xb, 0xd}, + new int[] {0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x3, 0x2, 0x2, + 0x2, 0x2, 0x2, 0x2, 0x2}); + break; + case 16: + fastpackEvenBit(in, inpos, out, outpos, 16, + new int[] {0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10}, + new int[] {0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2}); + break; + case 17: + fastpackOddBit(in, inpos, out, outpos, 17, + new int[] {0xf, 0xd, 0xb, 0x9, 0x7, 0x5, 0x3, 0x1, 0x10, + 0xe, 0xc, 0xa, 0x8, 0x6, 0x4, 0x2}, + new int[] {0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x1, 0x2, + 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x1}); + break; + case 18: + fastpackEvenBit(in, inpos, out, outpos, 18, + new int[] {0xe, 0xa, 0x6, 0x2, 0x10, 0xc, 0x8, 0x4}, + new int[] {0x2, 0x2, 0x2, 0x1, 0x2, 0x2, 0x2, 0x1}); + break; + case 19: + fastpackOddBit(in, inpos, out, outpos, 19, + new int[] {0xd, 0x7, 0x1, 0xe, 0x8, 0x2, 0xf, 0x9, 0x3, + 0x10, 0xa, 0x4, 0x11, 0xb, 0x5, 0x12, 0xc, 0x6}, + new int[] {0x2, 0x2, 0x1, 0x2, 0x2, 0x1, 0x2, 0x2, 0x1, + 0x2, 0x2, 0x1, 0x2, 0x2, 0x1, 0x2, 0x2, 0x1}); + break; + case 20: + fastpackEvenBit( + in, inpos, out, outpos, 20, + new int[] {0xc, 0x4, 0x10, 0x8, 0x14, 0xc, 0x4, 0x10, 0x8}, + new int[] {0x2, 0x1, 0x2, 0x1, 0x2, 0x2, 0x1, 0x2, 0x1}); + break; + case 21: + fastpackOddBit( + in, inpos, out, outpos, 21, + new int[] {0xb, 0x1, 0xc, 0x2, 0xd, 0x3, 0xe, 0x4, 0xf, 0x5, + 0x10, 0x6, 0x11, 0x7, 0x12, 0x8, 0x13, 0x9, 0x14, 0xa}, + new int[] {0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, + 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1}); + break; + case 22: + fastpackEvenBit( + in, inpos, out, outpos, 22, + new int[] {0xa, 0x14, 0x8, 0x12, 0x6, 0x10, 0x4, 0xe, 0x2, 0xc}, + new int[] {0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x1}); + break; + case 23: + fastpackOddBit(in, inpos, out, outpos, 23, + new int[] {0x9, 0x12, 0x4, 0xd, 0x16, 0x8, 0x11, 0x3, + 0xc, 0x15, 0x7, 0x10, 0x2, 0xb, 0x14, 0x6, + 0xf, 0x1, 0xa, 0x13, 0x5, 0xe}, + new int[] {0x1, 0x2, 0x1, 0x1, 0x2, 0x1, 0x2, 0x1, + 0x1, 0x2, 0x1, 0x2, 0x1, 0x1, 0x2, 0x1, + 0x2, 0x1, 0x1, 0x2, 0x1, 0x1}); + break; + case 24: + fastpackEvenBit( + in, inpos, out, outpos, 24, + new int[] {0x8, 0x10, 0x18, 0x8, 0x10, 0x18, 0x8, 0x10, 0x18, 0x8, + 0x10}, + new int[] {0x1, 0x1, 0x2, 0x1, 0x1, 0x2, 0x1, 0x1, 0x2, 0x1, 0x1}); + break; + case 25: + fastpackOddBit(in, inpos, out, outpos, 25, + new int[] {0x7, 0xe, 0x15, 0x3, 0xa, 0x11, 0x18, 0x6, + 0xd, 0x14, 0x2, 0x9, 0x10, 0x17, 0x5, 0xc, + 0x13, 0x1, 0x8, 0xf, 0x16, 0x4, 0xb, 0x12}, + new int[] {0x1, 0x1, 0x2, 0x1, 0x1, 0x1, 0x2, 0x1, + 0x1, 0x2, 0x1, 0x1, 0x1, 0x2, 0x1, 0x1, + 0x2, 0x1, 0x1, 0x1, 0x2, 0x1, 0x1, 0x1}); + break; + case 26: + fastpackEvenBit(in, inpos, out, outpos, 26, + new int[] {0x6, 0xc, 0x12, 0x18, 0x4, 0xa, 0x10, 0x16, + 0x2, 0x8, 0xe, 0x14}, + new int[] {0x1, 0x1, 0x1, 0x2, 0x1, 0x1, 0x1, 0x2, 0x1, + 0x1, 0x1, 0x1}); + break; + case 27: + fastpackOddBit(in, inpos, out, outpos, 27, + new int[] {0x5, 0xa, 0xf, 0x14, 0x19, 0x3, 0x8, + 0xd, 0x12, 0x17, 0x1, 0x6, 0xb, 0x10, + 0x15, 0x1a, 0x4, 0x9, 0xe, 0x13, 0x18, + 0x2, 0x7, 0xc, 0x11, 0x16}, + new int[] {0x1, 0x1, 0x1, 0x1, 0x2, 0x1, 0x1, 0x1, 0x1, + 0x2, 0x1, 0x1, 0x1, 0x1, 0x1, 0x2, 0x1, 0x1, + 0x1, 0x1, 0x2, 0x1, 0x1, 0x1, 0x1, 0x1}); + break; + case 28: + fastpackEvenBit(in, inpos, out, outpos, 28, + new int[] {0x4, 0x8, 0xc, 0x10, 0x14, 0x18, 0x1c, 0x4, + 0x8, 0xc, 0x10, 0x14, 0x18}, + new int[] {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x2, 0x1, 0x1, + 0x1, 0x1, 0x1, 0x1}); + break; + case 29: + fastpackOddBit( + in, inpos, out, outpos, 29, + new int[] {0x3, 0x6, 0x9, 0xc, 0xf, 0x12, 0x15, 0x18, 0x1b, 0x1, + 0x4, 0x7, 0xa, 0xd, 0x10, 0x13, 0x16, 0x19, 0x1c, 0x2, + 0x5, 0x8, 0xb, 0xe, 0x11, 0x14, 0x17, 0x1a}, + new int[] {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x2, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x2, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}); + break; + case 30: + fastpackEvenBit(in, inpos, out, outpos, 30, + new int[] {0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, + 0x14, 0x16, 0x18, 0x1a, 0x1c}, + new int[] {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1}); + break; + case 31: + fastpackOddBit(in, inpos, out, outpos, 31, + new int[] {0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e}, + new int[] {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}); + break; + case 32: + System.arraycopy(in, inpos, out, outpos, 256); + break; + } + } + + public static void fastpackNoMask(final int[] in, int inpos, final int[] out, + int outpos, int b) { + switch (b) { + case 0: + break; + case 1: + fastpackOddBitNoMask(in, inpos, out, outpos, 1, new int[] {}, + new int[] {}); + break; + case 2: + fastpackEvenBitNoMask(in, inpos, out, outpos, 2, new int[] {}, + new int[] {}); + break; + case 3: + fastpackOddBitNoMask(in, inpos, out, outpos, 3, new int[] {0x2, 0x1}, + new int[] {0xb, 0xa}); + break; + case 4: + fastpackEvenBitNoMask(in, inpos, out, outpos, 4, new int[] {0x4}, + new int[] {0x8}); + break; + case 5: + fastpackOddBitNoMask(in, inpos, out, outpos, 5, + new int[] {0x2, 0x4, 0x1, 0x3}, + new int[] {0x6, 0x7, 0x6, 0x6}); + break; + case 6: + fastpackEvenBitNoMask(in, inpos, out, outpos, 6, new int[] {0x2, 0x4}, + new int[] {0x5, 0x5}); + break; + case 7: + fastpackOddBitNoMask(in, inpos, out, outpos, 7, + new int[] {0x4, 0x1, 0x5, 0x2, 0x6, 0x3}, + new int[] {0x5, 0x4, 0x5, 0x4, 0x5, 0x4}); + break; + case 8: + fastpackEvenBitNoMask(in, inpos, out, outpos, 8, + new int[] {0x8, 0x8, 0x8}, + new int[] {0x4, 0x4, 0x4}); + break; + case 9: + fastpackOddBitNoMask(in, inpos, out, outpos, 9, + new int[] {0x5, 0x1, 0x6, 0x2, 0x7, 0x3, 0x8, 0x4}, + new int[] {0x4, 0x3, 0x4, 0x3, 0x4, 0x3, 0x4, 0x3}); + break; + case 10: + fastpackEvenBitNoMask(in, inpos, out, outpos, 10, + new int[] {0x2, 0x4, 0x6, 0x8}, + new int[] {0x3, 0x3, 0x3, 0x3}); + break; + case 11: + fastpackOddBitNoMask( + in, inpos, out, outpos, 11, + new int[] {0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1}, + new int[] {0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x2}); + break; + case 12: + fastpackEvenBitNoMask(in, inpos, out, outpos, 12, + new int[] {0x8, 0x4, 0xc, 0x8, 0x4}, + new int[] {0x3, 0x2, 0x3, 0x3, 0x2}); + break; + case 13: + fastpackOddBitNoMask(in, inpos, out, outpos, 13, + new int[] {0x6, 0xc, 0x5, 0xb, 0x4, 0xa, 0x3, 0x9, + 0x2, 0x8, 0x1, 0x7}, + new int[] {0x2, 0x3, 0x2, 0x3, 0x2, 0x3, 0x2, 0x3, + 0x2, 0x3, 0x2, 0x2}); + break; + case 14: + fastpackEvenBitNoMask(in, inpos, out, outpos, 14, + new int[] {0x4, 0x8, 0xc, 0x2, 0x6, 0xa}, + new int[] {0x2, 0x2, 0x3, 0x2, 0x2, 0x2}); + break; + case 15: + fastpackOddBitNoMask(in, inpos, out, outpos, 15, + new int[] {0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x1, + 0x3, 0x5, 0x7, 0x9, 0xb, 0xd}, + new int[] {0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x3, 0x2, + 0x2, 0x2, 0x2, 0x2, 0x2, 0x2}); + break; + case 16: + fastpackEvenBitNoMask( + in, inpos, out, outpos, 16, + new int[] {0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10}, + new int[] {0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2}); + break; + case 17: + fastpackOddBitNoMask(in, inpos, out, outpos, 17, + new int[] {0xf, 0xd, 0xb, 0x9, 0x7, 0x5, 0x3, 0x1, + 0x10, 0xe, 0xc, 0xa, 0x8, 0x6, 0x4, 0x2}, + new int[] {0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x1, + 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x1}); + break; + case 18: + fastpackEvenBitNoMask(in, inpos, out, outpos, 18, + new int[] {0xe, 0xa, 0x6, 0x2, 0x10, 0xc, 0x8, 0x4}, + new int[] {0x2, 0x2, 0x2, 0x1, 0x2, 0x2, 0x2, 0x1}); + break; + case 19: + fastpackOddBitNoMask( + in, inpos, out, outpos, 19, + new int[] {0xd, 0x7, 0x1, 0xe, 0x8, 0x2, 0xf, 0x9, 0x3, 0x10, 0xa, + 0x4, 0x11, 0xb, 0x5, 0x12, 0xc, 0x6}, + new int[] {0x2, 0x2, 0x1, 0x2, 0x2, 0x1, 0x2, 0x2, 0x1, 0x2, 0x2, 0x1, + 0x2, 0x2, 0x1, 0x2, 0x2, 0x1}); + break; + case 20: + fastpackEvenBitNoMask( + in, inpos, out, outpos, 20, + new int[] {0xc, 0x4, 0x10, 0x8, 0x14, 0xc, 0x4, 0x10, 0x8}, + new int[] {0x2, 0x1, 0x2, 0x1, 0x2, 0x2, 0x1, 0x2, 0x1}); + break; + case 21: + fastpackOddBitNoMask( + in, inpos, out, outpos, 21, + new int[] {0xb, 0x1, 0xc, 0x2, 0xd, 0x3, 0xe, 0x4, 0xf, 0x5, + 0x10, 0x6, 0x11, 0x7, 0x12, 0x8, 0x13, 0x9, 0x14, 0xa}, + new int[] {0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, + 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1}); + break; + case 22: + fastpackEvenBitNoMask( + in, inpos, out, outpos, 22, + new int[] {0xa, 0x14, 0x8, 0x12, 0x6, 0x10, 0x4, 0xe, 0x2, 0xc}, + new int[] {0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x1}); + break; + case 23: + fastpackOddBitNoMask( + in, inpos, out, outpos, 23, + new int[] {0x9, 0x12, 0x4, 0xd, 0x16, 0x8, 0x11, 0x3, + 0xc, 0x15, 0x7, 0x10, 0x2, 0xb, 0x14, 0x6, + 0xf, 0x1, 0xa, 0x13, 0x5, 0xe}, + new int[] {0x1, 0x2, 0x1, 0x1, 0x2, 0x1, 0x2, 0x1, 0x1, 0x2, 0x1, + 0x2, 0x1, 0x1, 0x2, 0x1, 0x2, 0x1, 0x1, 0x2, 0x1, 0x1}); + break; + case 24: + fastpackEvenBitNoMask( + in, inpos, out, outpos, 24, + new int[] {0x8, 0x10, 0x18, 0x8, 0x10, 0x18, 0x8, 0x10, 0x18, 0x8, + 0x10}, + new int[] {0x1, 0x1, 0x2, 0x1, 0x1, 0x2, 0x1, 0x1, 0x2, 0x1, 0x1}); + break; + case 25: + fastpackOddBitNoMask(in, inpos, out, outpos, 25, + new int[] {0x7, 0xe, 0x15, 0x3, 0xa, 0x11, + 0x18, 0x6, 0xd, 0x14, 0x2, 0x9, + 0x10, 0x17, 0x5, 0xc, 0x13, 0x1, + 0x8, 0xf, 0x16, 0x4, 0xb, 0x12}, + new int[] {0x1, 0x1, 0x2, 0x1, 0x1, 0x1, 0x2, 0x1, + 0x1, 0x2, 0x1, 0x1, 0x1, 0x2, 0x1, 0x1, + 0x2, 0x1, 0x1, 0x1, 0x2, 0x1, 0x1, 0x1}); + break; + case 26: + fastpackEvenBitNoMask(in, inpos, out, outpos, 26, + new int[] {0x6, 0xc, 0x12, 0x18, 0x4, 0xa, 0x10, + 0x16, 0x2, 0x8, 0xe, 0x14}, + new int[] {0x1, 0x1, 0x1, 0x2, 0x1, 0x1, 0x1, 0x2, + 0x1, 0x1, 0x1, 0x1}); + break; + case 27: + fastpackOddBitNoMask( + in, inpos, out, outpos, 27, + new int[] {0x5, 0xa, 0xf, 0x14, 0x19, 0x3, 0x8, 0xd, 0x12, + 0x17, 0x1, 0x6, 0xb, 0x10, 0x15, 0x1a, 0x4, 0x9, + 0xe, 0x13, 0x18, 0x2, 0x7, 0xc, 0x11, 0x16}, + new int[] {0x1, 0x1, 0x1, 0x1, 0x2, 0x1, 0x1, 0x1, 0x1, + 0x2, 0x1, 0x1, 0x1, 0x1, 0x1, 0x2, 0x1, 0x1, + 0x1, 0x1, 0x2, 0x1, 0x1, 0x1, 0x1, 0x1}); + break; + case 28: + fastpackEvenBitNoMask(in, inpos, out, outpos, 28, + new int[] {0x4, 0x8, 0xc, 0x10, 0x14, 0x18, 0x1c, + 0x4, 0x8, 0xc, 0x10, 0x14, 0x18}, + new int[] {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x2, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1}); + break; + case 29: + fastpackOddBitNoMask( + in, inpos, out, outpos, 29, + new int[] {0x3, 0x6, 0x9, 0xc, 0xf, 0x12, 0x15, 0x18, 0x1b, 0x1, + 0x4, 0x7, 0xa, 0xd, 0x10, 0x13, 0x16, 0x19, 0x1c, 0x2, + 0x5, 0x8, 0xb, 0xe, 0x11, 0x14, 0x17, 0x1a}, + new int[] {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x2, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x2, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}); + break; + case 30: + fastpackEvenBitNoMask(in, inpos, out, outpos, 30, + new int[] {0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, + 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c}, + new int[] {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}); + break; + case 31: + fastpackOddBitNoMask( + in, inpos, out, outpos, 31, + new int[] {0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e}, + new int[] {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}); + break; + case 32: + System.arraycopy(in, inpos, out, outpos, 256); + break; + } + } + + public static void fastunpack(final int[] in, int inpos, final int[] out, + int outpos, int b) { + switch (b) { + case 0: + Arrays.fill(out, outpos, outpos + 256, 0); + break; + case 1: + fastUnpackOddBit(in, inpos, out, outpos, 1, new int[] {}, new int[] {}, + new int[] {}); + break; + case 2: + fastUnpackEventBit(in, inpos, out, outpos, 2, new int[] {}, new int[] {}, + new int[] {}); + break; + case 3: + fastUnpackOddBit(in, inpos, out, outpos, 3, new int[] {0x1, 0x2}, + new int[] {0x1, 0x3}, new int[] {0xa, 0xa}); + break; + case 4: + fastUnpackEventBit(in, inpos, out, outpos, 4, new int[] {0x4}, + new int[] {0xf}, new int[] {0x7}); + break; + case 5: + fastUnpackOddBit( + in, inpos, out, outpos, 5, new int[] {0x3, 0x1, 0x4, 0x2}, + new int[] {0x7, 0x1, 0xf, 0x3}, new int[] {0x5, 0x6, 0x5, 0x6}); + break; + case 6: + fastUnpackEventBit(in, inpos, out, outpos, 6, new int[] {0x4, 0x2}, + new int[] {0xf, 0x3}, new int[] {0x4, 0x5}); + break; + case 7: + fastUnpackOddBit(in, inpos, out, outpos, 7, + new int[] {0x3, 0x6, 0x2, 0x5, 0x1, 0x4}, + new int[] {0x7, 0x3f, 0x3, 0x1f, 0x1, 0xf}, + new int[] {0x4, 0x3, 0x4, 0x3, 0x4, 0x4}); + break; + case 8: + fastUnpackEventBit(in, inpos, out, outpos, 8, new int[] {0x8, 0x8, 0x8}, + new int[] {0xff, 0xff, 0xff}, + new int[] {0x3, 0x3, 0x3}); + break; + case 9: + fastUnpackOddBit(in, inpos, out, outpos, 9, + new int[] {0x4, 0x8, 0x3, 0x7, 0x2, 0x6, 0x1, 0x5}, + new int[] {0xf, 0xff, 0x7, 0x7f, 0x3, 0x3f, 0x1, 0x1f}, + new int[] {0x3, 0x2, 0x3, 0x2, 0x3, 0x2, 0x3, 0x3}); + break; + case 10: + fastUnpackEventBit( + in, inpos, out, outpos, 10, new int[] {0x8, 0x6, 0x4, 0x2}, + new int[] {0xff, 0x3f, 0xf, 0x3}, new int[] {0x2, 0x2, 0x2, 0x3}); + break; + case 11: + fastUnpackOddBit( + in, inpos, out, outpos, 11, + new int[] {0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa}, + new int[] {0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff}, + new int[] {0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2}); + break; + case 12: + fastUnpackEventBit(in, inpos, out, outpos, 12, + new int[] {0x4, 0x8, 0xc, 0x4, 0x8}, + new int[] {0xf, 0xff, 0xfff, 0xf, 0xff}, + new int[] {0x2, 0x2, 0x1, 0x2, 0x2}); + break; + case 13: + fastUnpackOddBit(in, inpos, out, outpos, 13, + new int[] {0x7, 0x1, 0x8, 0x2, 0x9, 0x3, 0xa, 0x4, 0xb, + 0x5, 0xc, 0x6}, + new int[] {0x7f, 0x1, 0xff, 0x3, 0x1ff, 0x7, 0x3ff, 0xf, + 0x7ff, 0x1f, 0xfff, 0x3f}, + new int[] {0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, + 0x2, 0x1, 0x2}); + break; + case 14: + fastUnpackEventBit(in, inpos, out, outpos, 14, + new int[] {0xa, 0x6, 0x2, 0xc, 0x8, 0x4}, + new int[] {0x3ff, 0x3f, 0x3, 0xfff, 0xff, 0xf}, + new int[] {0x1, 0x1, 0x2, 0x1, 0x1, 0x2}); + break; + case 15: + fastUnpackOddBit(in, inpos, out, outpos, 15, + new int[] {0xd, 0xb, 0x9, 0x7, 0x5, 0x3, 0x1, 0xe, 0xc, + 0xa, 0x8, 0x6, 0x4, 0x2}, + new int[] {0x1fff, 0x7ff, 0x1ff, 0x7f, 0x1f, 0x7, 0x1, + 0x3fff, 0xfff, 0x3ff, 0xff, 0x3f, 0xf, 0x3}, + new int[] {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x2, 0x1, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x2}); + break; + case 16: + fastUnpackEventBit( + in, inpos, out, outpos, 16, + new int[] {0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10}, + new int[] {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + new int[] {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}); + break; + case 17: + fastUnpackOddBit(in, inpos, out, outpos, 17, + new int[] {0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x1, + 0x3, 0x5, 0x7, 0x9, 0xb, 0xd, 0xf}, + new int[] {0x3, 0xf, 0x3f, 0xff, 0x3ff, 0xfff, 0x3fff, + 0xffff, 0x1, 0x7, 0x1f, 0x7f, 0x1ff, 0x7ff, + 0x1fff, 0x7fff}, + new int[] {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}); + break; + case 18: + fastUnpackEventBit( + in, inpos, out, outpos, 18, + new int[] {0x4, 0x8, 0xc, 0x10, 0x2, 0x6, 0xa, 0xe}, + new int[] {0xf, 0xff, 0xfff, 0xffff, 0x3, 0x3f, 0x3ff, 0x3fff}, + new int[] {0x1, 0x1, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1}); + break; + case 19: + fastUnpackOddBit(in, inpos, out, outpos, 19, + new int[] {0x6, 0xc, 0x12, 0x5, 0xb, 0x11, 0x4, 0xa, + 0x10, 0x3, 0x9, 0xf, 0x2, 0x8, 0xe, 0x1, 0x7, + 0xd}, + new int[] {0x3f, 0xfff, 0x3ffff, 0x1f, 0x7ff, 0x1ffff, + 0xf, 0x3ff, 0xffff, 0x7, 0x1ff, 0x7fff, 0x3, + 0xff, 0x3fff, 0x1, 0x7f, 0x1fff}, + new int[] {0x1, 0x1, 0x0, 0x1, 0x1, 0x0, 0x1, 0x1, 0x0, + 0x1, 0x1, 0x0, 0x1, 0x1, 0x0, 0x1, 0x1, 0x1}); + break; + case 20: + fastUnpackEventBit( + in, inpos, out, outpos, 20, + new int[] {0x8, 0x10, 0x4, 0xc, 0x14, 0x8, 0x10, 0x4, 0xc}, + new int[] {0xff, 0xffff, 0xf, 0xfff, 0xfffff, 0xff, 0xffff, 0xf, + 0xfff}, + new int[] {0x1, 0x0, 0x1, 0x1, 0x0, 0x1, 0x0, 0x1, 0x1}); + break; + case 21: + fastUnpackOddBit( + in, inpos, out, outpos, 21, + new int[] {0xa, 0x14, 0x9, 0x13, 0x8, 0x12, 0x7, 0x11, 0x6, 0x10, + 0x5, 0xf, 0x4, 0xe, 0x3, 0xd, 0x2, 0xc, 0x1, 0xb}, + new int[] {0x3ff, 0xfffff, 0x1ff, 0x7ffff, 0xff, 0x3ffff, 0x7f, + 0x1ffff, 0x3f, 0xffff, 0x1f, 0x7fff, 0xf, 0x3fff, + 0x7, 0x1fff, 0x3, 0xfff, 0x1, 0x7ff}, + new int[] {0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, + 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x1}); + break; + case 22: + fastUnpackEventBit( + in, inpos, out, outpos, 22, + new int[] {0xc, 0x2, 0xe, 0x4, 0x10, 0x6, 0x12, 0x8, 0x14, 0xa}, + new int[] {0xfff, 0x3, 0x3fff, 0xf, 0xffff, 0x3f, 0x3ffff, 0xff, + 0xfffff, 0x3ff}, + new int[] {0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1}); + break; + case 23: + fastUnpackOddBit( + in, inpos, out, outpos, 23, + new int[] {0xe, 0x5, 0x13, 0xa, 0x1, 0xf, 0x6, 0x14, + 0xb, 0x2, 0x10, 0x7, 0x15, 0xc, 0x3, 0x11, + 0x8, 0x16, 0xd, 0x4, 0x12, 0x9}, + new int[] {0x3fff, 0x1f, 0x7ffff, 0x3ff, 0x1, 0x7fff, + 0x3f, 0xfffff, 0x7ff, 0x3, 0xffff, 0x7f, + 0x1fffff, 0xfff, 0x7, 0x1ffff, 0xff, 0x3fffff, + 0x1fff, 0xf, 0x3ffff, 0x1ff}, + new int[] {0x0, 0x1, 0x0, 0x0, 0x1, 0x0, 0x1, 0x0, 0x0, 0x1, 0x0, + 0x1, 0x0, 0x0, 0x1, 0x0, 0x1, 0x0, 0x0, 0x1, 0x0, 0x1}); + break; + case 24: + fastUnpackEventBit( + in, inpos, out, outpos, 24, + new int[] {0x10, 0x8, 0x18, 0x10, 0x8, 0x18, 0x10, 0x8, 0x18, 0x10, + 0x8}, + new int[] {0xffff, 0xff, 0xffffff, 0xffff, 0xff, 0xffffff, 0xffff, + 0xff, 0xffffff, 0xffff, 0xff}, + new int[] {0x0, 0x1, 0x0, 0x0, 0x1, 0x0, 0x0, 0x1, 0x0, 0x0, 0x1}); + break; + case 25: + fastUnpackOddBit( + in, inpos, out, outpos, 25, + new int[] {0x12, 0xb, 0x4, 0x16, 0xf, 0x8, 0x1, 0x13, + 0xc, 0x5, 0x17, 0x10, 0x9, 0x2, 0x14, 0xd, + 0x6, 0x18, 0x11, 0xa, 0x3, 0x15, 0xe, 0x7}, + new int[] {0x3ffff, 0x7ff, 0xf, 0x3fffff, 0x7fff, 0xff, + 0x1, 0x7ffff, 0xfff, 0x1f, 0x7fffff, 0xffff, + 0x1ff, 0x3, 0xfffff, 0x1fff, 0x3f, 0xffffff, + 0x1ffff, 0x3ff, 0x7, 0x1fffff, 0x3fff, 0x7f}, + new int[] {0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x0, + 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, + 0x1, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x1}); + break; + case 26: + fastUnpackEventBit(in, inpos, out, outpos, 26, + new int[] {0x14, 0xe, 0x8, 0x2, 0x16, 0x10, 0xa, 0x4, + 0x18, 0x12, 0xc, 0x6}, + new int[] {0xfffff, 0x3fff, 0xff, 0x3, 0x3fffff, + 0xffff, 0x3ff, 0xf, 0xffffff, 0x3ffff, + 0xfff, 0x3f}, + new int[] {0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x0, + 0x0, 0x0, 0x1}); + break; + case 27: + fastUnpackOddBit( + in, inpos, out, outpos, 27, + new int[] {0x16, 0x11, 0xc, 0x7, 0x2, 0x18, 0x13, 0xe, 0x9, + 0x4, 0x1a, 0x15, 0x10, 0xb, 0x6, 0x1, 0x17, 0x12, + 0xd, 0x8, 0x3, 0x19, 0x14, 0xf, 0xa, 0x5}, + new int[] {0x3fffff, 0x1ffff, 0xfff, 0x7f, 0x3, 0xffffff, + 0x7ffff, 0x3fff, 0x1ff, 0xf, 0x3ffffff, 0x1fffff, + 0xffff, 0x7ff, 0x3f, 0x1, 0x7fffff, 0x3ffff, + 0x1fff, 0xff, 0x7, 0x1ffffff, 0xfffff, 0x7fff, + 0x3ff, 0x1f}, + new int[] {0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, + 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, + 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x1}); + break; + case 28: + fastUnpackEventBit(in, inpos, out, outpos, 28, + new int[] {0x18, 0x14, 0x10, 0xc, 0x8, 0x4, 0x1c, 0x18, + 0x14, 0x10, 0xc, 0x8, 0x4}, + new int[] {0xffffff, 0xfffff, 0xffff, 0xfff, 0xff, 0xf, + 0xfffffff, 0xffffff, 0xfffff, 0xffff, 0xfff, + 0xff, 0xf}, + new int[] {0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x1}); + break; + case 29: + fastUnpackOddBit( + in, inpos, out, outpos, 29, + new int[] {0x1a, 0x17, 0x14, 0x11, 0xe, 0xb, 0x8, 0x5, 0x2, 0x1c, + 0x19, 0x16, 0x13, 0x10, 0xd, 0xa, 0x7, 0x4, 0x1, 0x1b, + 0x18, 0x15, 0x12, 0xf, 0xc, 0x9, 0x6, 0x3}, + new int[] {0x3ffffff, 0x7fffff, 0xfffff, 0x1ffff, 0x3fff, + 0x7ff, 0xff, 0x1f, 0x3, 0xfffffff, + 0x1ffffff, 0x3fffff, 0x7ffff, 0xffff, 0x1fff, + 0x3ff, 0x7f, 0xf, 0x1, 0x7ffffff, + 0xffffff, 0x1fffff, 0x3ffff, 0x7fff, 0xfff, + 0x1ff, 0x3f, 0x7}, + new int[] {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1}); + break; + case 30: + fastUnpackEventBit(in, inpos, out, outpos, 30, + new int[] {0x1c, 0x1a, 0x18, 0x16, 0x14, 0x12, 0x10, + 0xe, 0xc, 0xa, 0x8, 0x6, 0x4, 0x2}, + new int[] {0xfffffff, 0x3ffffff, 0xffffff, 0x3fffff, + 0xfffff, 0x3ffff, 0xffff, 0x3fff, 0xfff, + 0x3ff, 0xff, 0x3f, 0xf, 0x3}, + new int[] {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x1}); + break; + case 31: + fastUnpackOddBit( + in, inpos, out, outpos, 31, + new int[] {0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, + 0x14, 0x13, 0x12, 0x11, 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, + 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1}, + new int[] {0x3fffffff, 0x1fffffff, 0xfffffff, 0x7ffffff, 0x3ffffff, + 0x1ffffff, 0xffffff, 0x7fffff, 0x3fffff, 0x1fffff, + 0xfffff, 0x7ffff, 0x3ffff, 0x1ffff, 0xffff, + 0x7fff, 0x3fff, 0x1fff, 0xfff, 0x7ff, + 0x3ff, 0x1ff, 0xff, 0x7f, 0x3f, + 0x1f, 0xf, 0x7, 0x3, 0x1}, + new int[] {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1}); + break; + + case 32: + System.arraycopy(in, inpos, out, outpos, 256); + break; + } + } +} diff --git a/src/main/java/me/lemire/integercompression/vector/VectorFastPFOR.java b/src/main/java/me/lemire/integercompression/vector/VectorFastPFOR.java new file mode 100644 index 0000000..7374fa5 --- /dev/null +++ b/src/main/java/me/lemire/integercompression/vector/VectorFastPFOR.java @@ -0,0 +1,366 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + * (c) Intel Corp. (for Vector implementation) + */ +package me.lemire.integercompression.vector; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import me.lemire.integercompression.IntegerCODEC; +import me.lemire.integercompression.SkippableIntegerCODEC; +import me.lemire.integercompression.IntWrapper; + +/** + * This is a patching scheme designed for speed. + * It encodes integers in blocks of integers within pages of + * up to 65536 integers. Note that it is important, to get good + * compression and good performance, to use sizeable arrays (greater than 1024 + * integers). For arrays containing a number of integers that is not divisible + * by BLOCK_SIZE, you should use it in conjunction with another CODEC: + * + * IntegerCODEC ic = new Composition(new VectorFastPFOR(), new VariableByte()). + *

+ * For details, please see: + *

+ * Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second + * through vectorization Software: Practice & Experience + * http://onlinelibrary.wiley.com/doi/10.1002/spe.2203/abstract + * http://arxiv.org/abs/1209.2137 + *

+ *

For sufficiently compressible and long arrays, it is faster and better + * than other PFOR schemes.

+ * + * Note that this does not use differential coding: if you are working on sorted + * lists, you should first compute deltas, @see + * me.lemire.integercompression.differential.Delta#delta. + * + * For multi-threaded applications, each thread should use its own FastPFOR + * object. + * + * @author Daniel Lemire + */ +public class VectorFastPFOR implements IntegerCODEC, SkippableIntegerCODEC { + private final static int OVERHEAD_OF_EACH_EXCEPT = 8; + public final static int DEFAULT_PAGE_SIZE = 64 << 10; + + public final static int BLOCK_SIZE = 256; + private final static int INTS_PER_BLOCK = BLOCK_SIZE >>> 5; + + private final int pageSize; + private final int[][] dataTobePacked = new int[33][]; + private int[] exceptData = null; + + // Working area for compress and uncompress. + private final int[] dataPointers = new int[33]; + private final int[] freqs = new int[33]; + private final byte[] bem; + /** + * Construct the FastPFOR CODEC. + * + * @param pagesize + * the desired page size (recommended value is + * FastPFOR.DEFAULT_PAGE_SIZE) + */ + private VectorFastPFOR(int pagesize) { + pageSize = pagesize; + // Initiate arrrays. + bem = new byte[3 * pageSize / BLOCK_SIZE + pagesize]; + for (int k = 1; k < dataTobePacked.length; ++k) + dataTobePacked[k] = new int[pageSize / 32 * 4]; // heuristic + exceptData = new int[pageSize * 4]; + } + + /** + * Construct the fastPFOR CODEC with default parameters. + */ + public VectorFastPFOR() { this(DEFAULT_PAGE_SIZE); } + + /** + * Compress data in blocks of BLOCK_SIZE integers (if fewer than BLOCK_SIZE + * integers are provided, nothing is done). + * + * @see IntegerCODEC#compress(int[], IntWrapper, int, int[], IntWrapper) + */ + @Override + public void headlessCompress(int[] in, IntWrapper inpos, int inlength, + int[] out, IntWrapper outpos) { + inlength = inlength - inlength % BLOCK_SIZE; + // Allocate memory for working area. + + final int finalinpos = inpos.get() + inlength; + while (inpos.get() != finalinpos) { + int thissize = Math.min(pageSize, finalinpos - inpos.get()); + encodePage(in, inpos, thissize, out, outpos); + } + } + + private void getBestBitSize(int[] in, int pos, int index) { + Arrays.fill(freqs, 0); + for (int i = pos, limit = pos + BLOCK_SIZE; i < limit; i++) { + freqs[32 - Integer.numberOfLeadingZeros(in[i])]++; + } + bem[index] = 32; + while (freqs[bem[index]] == 0) + bem[index]--; + bem[index + 2] = bem[index]; + int maxb = bem[index + 2]; + int bestcost = bem[index] * BLOCK_SIZE; + int cexcept = 0; + bem[index + 1] = 0; + for (int b = bem[index] - 1; b >= 0; --b) { + cexcept += freqs[b + 1]; + if (cexcept == BLOCK_SIZE) + break; + // the extra 8 is the cost of storing maxbits + int thiscost = cexcept * OVERHEAD_OF_EACH_EXCEPT + cexcept * (maxb - b) + + b * BLOCK_SIZE + 8; + if (maxb - b == 1) + thiscost -= cexcept; + if (thiscost < bestcost) { + bestcost = thiscost; + bem[index] = (byte)b; + bem[index + 1] = (byte)cexcept; + } + } + } + + private void encodePage(int[] in, IntWrapper inpos, int thissize, int[] out, + IntWrapper outpos) { + final int headerpos = outpos.get(); + outpos.increment(); + int tmpoutpos = outpos.get(); + + // Clear working area. + Arrays.fill(dataPointers, 0); + Arrays.fill(bem, (byte)0); + + int tmpinpos = inpos.get(); + final int finalinpos = tmpinpos + thissize - BLOCK_SIZE; + int bindex = 0; + for (; tmpinpos <= finalinpos; tmpinpos += BLOCK_SIZE) { + getBestBitSize(in, tmpinpos, bindex); + final int tmpexcept = bem[bindex + 1] & 0xFF; + final int tmpbestb = bem[bindex]; + if (tmpexcept > 0) { + final int index = bem[bindex + 2] - tmpbestb; + if (dataPointers[index] + tmpexcept >= dataTobePacked[index].length) { + int newsize = 2 * (dataPointers[index] + tmpexcept); + int val = newsize + BLOCK_SIZE - 1; + newsize = val - val % BLOCK_SIZE; + dataTobePacked[index] = Arrays.copyOf(dataTobePacked[index], newsize); + } + bindex += 3; + for (int k = 0; k < BLOCK_SIZE; ++k) { + if ((in[k + tmpinpos] >>> tmpbestb) != 0) { + // we have an exception + bem[bindex++] = (byte)k; + dataTobePacked[index][dataPointers[index]++] = + in[k + tmpinpos] >>> tmpbestb; + } + } + } else { + bindex += 2; + } + VectorBitPacker.fastpack(in, tmpinpos, out, tmpoutpos, tmpbestb); + tmpoutpos += INTS_PER_BLOCK * tmpbestb; + } + inpos.set(tmpinpos); + out[headerpos] = tmpoutpos - headerpos; + + int bytesize = bindex; + out[tmpoutpos++] = bytesize; + + bytesize = bytesize % 4 == 0 ? bytesize : (bytesize / 4) * 4 + 4; + for (int i = 0; i <= bytesize - 4; i += 4) { + out[tmpoutpos] = bem[i] & 0xFF; + out[tmpoutpos] |= (bem[i + 1] & 0xFF) << 8; + out[tmpoutpos] |= (bem[i + 2] & 0xFF) << 16; + out[tmpoutpos] |= (bem[i + 3] & 0xFF) << 24; + tmpoutpos++; + } + + int bitmap = 0; + for (int k = 2; k <= 32; ++k) { + if (dataPointers[k] != 0) + bitmap |= (1 << (k - 1)); + } + out[tmpoutpos++] = bitmap; + + for (int k = 2; k <= 32; ++k) { + if (dataPointers[k] != 0) { + out[tmpoutpos++] = dataPointers[k]; // size + int j = 0; + int n = (dataPointers[k] / BLOCK_SIZE) * BLOCK_SIZE; + for (; j < n; j += BLOCK_SIZE) { + VectorBitPacker.fastpackNoMask(dataTobePacked[k], j, out, tmpoutpos, + k); + tmpoutpos += INTS_PER_BLOCK * k; + } + int r = dataPointers[k] % BLOCK_SIZE; + if (r != 0) { + tmpoutpos = VectorBitPacker.slowpack(dataTobePacked[k], j, r, out, + tmpoutpos, k); + tmpoutpos++; + } + } + } + outpos.set(tmpoutpos); + } + + /** + * Uncompress data in blocks of integers. In this particular case, + * the inlength parameter is ignored: it is deduced from the compressed + * data. + * + * @see IntegerCODEC#compress(int[], IntWrapper, int, int[], IntWrapper) + */ + @Override + public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, + int[] out, IntWrapper outpos, int mynvalue) { + mynvalue = mynvalue - mynvalue % BLOCK_SIZE; + int finalout = outpos.get() + mynvalue; + while (outpos.get() != finalout) { + int thissize = Math.min(pageSize, finalout - outpos.get()); + decodePage(in, inpos, out, outpos, thissize); + } + } + + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + throw new UnsupportedOperationException("Calculating the max compressed length is not supported yet."); + } + + private void loadMetaData(int[] in, int inexcept, int bytesize) { + // Arrays.fill(bem, (byte)0); + int len = (bytesize + 3) / 4; + int lc = 0; + for (int i = 0; i < len; i++) { + bem[lc++] = (byte)(in[inexcept + i]); + bem[lc++] = (byte)(in[inexcept + i] >>> 8); + bem[lc++] = (byte)(in[inexcept + i] >>> 16); + bem[lc++] = (byte)(in[inexcept + i] >>> 24); + } + } + + private void decodePage(int[] in, IntWrapper inpos, int[] out, + IntWrapper outpos, int thissize) { + final int initpos = inpos.get(); + final int wheremeta = in[inpos.get()]; + inpos.increment(); + int inexcept = initpos + wheremeta; + + final int bytesize = in[inexcept++]; + loadMetaData(in, inexcept, bytesize); + inexcept += (bytesize + 3) / 4; + final int bitmap = in[inexcept++]; + for (int k = 2; k <= 32; ++k) { + if ((bitmap & (1 << (k - 1))) != 0) { + int size = in[inexcept++]; + int val = size + BLOCK_SIZE - 1; + int roundedup = val - val % BLOCK_SIZE; + if (dataTobePacked[k].length < roundedup) + dataTobePacked[k] = new int[roundedup]; + if (inexcept + roundedup / 32 * k <= in.length) { + int j = 0; + int len = (size / BLOCK_SIZE) * BLOCK_SIZE; + for (; j < len; j += BLOCK_SIZE) { + VectorBitPacker.fastunpack(in, inexcept, dataTobePacked[k], j, k); + inexcept += INTS_PER_BLOCK * k; + } + int r = size % BLOCK_SIZE; + inexcept = VectorBitPacker.slowunpack(in, inexcept, dataTobePacked[k], + j, r, k); + } else { + int j = 0; + val = roundedup / 32 * k + BLOCK_SIZE - 1; + int[] buf = new int[val - val % BLOCK_SIZE]; + int initinexcept = inexcept; + System.arraycopy(in, inexcept, buf, 0, in.length - inexcept); + int l = (size / BLOCK_SIZE) * BLOCK_SIZE; + for (; j < l; j += BLOCK_SIZE) { + VectorBitPacker.fastunpack(buf, inexcept - initinexcept, + dataTobePacked[k], j, k); + inexcept += INTS_PER_BLOCK * k; + } + int r = size % BLOCK_SIZE; + inexcept = VectorBitPacker.slowunpack(in, inexcept, dataTobePacked[k], + j, r, k); + } + } + } + Arrays.fill(dataPointers, 0); + int tmpoutpos = outpos.get(); + int tmpinpos = inpos.get(); + int idx = 0; + for (int run = 0, run_end = thissize / BLOCK_SIZE; run < run_end; + ++run, tmpoutpos += BLOCK_SIZE) { + final int b = bem[idx]; // byteContainer.get(); + final int cexcept = bem[idx + 1] & 0xFF; // byteContainer.get() & 0xFF; + VectorBitPacker.fastunpack(in, tmpinpos, out, tmpoutpos, b); + tmpinpos += INTS_PER_BLOCK * b; + if (cexcept > 0) { + final int maxbits = bem[idx + 2]; // byteContainer.get(); + idx += 3; + final int index = maxbits - b; + if (index == 1) { + for (int k = 0; k < cexcept; ++k) { + final int pos = bem[idx++] & 0xFF; // byteContainer.get() & 0xFF; + out[pos + tmpoutpos] |= 1 << b; + } + } else { + for (int k = 0; k < cexcept; ++k) { + final int pos = bem[idx++] & 0xFF; // byteContainer.get() & 0xFF; + final int exceptvalue = + dataTobePacked[index][dataPointers[index]++]; + out[pos + tmpoutpos] |= exceptvalue << b; + } + } + } else { + idx += 2; + } + } + outpos.set(tmpoutpos); + inpos.set(inexcept); + } + + @Override + public void compress(int[] in, IntWrapper inpos, int inlength, int[] out, + IntWrapper outpos) { + inlength = inlength - inlength % BLOCK_SIZE; + if (inlength == 0) + return; + out[outpos.get()] = inlength; + outpos.increment(); + headlessCompress(in, inpos, inlength, out, outpos); + } + + @Override + public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out, + IntWrapper outpos) { + if (inlength == 0) + return; + final int outlength = in[inpos.get()]; + inpos.increment(); + headlessUncompress(in, inpos, inlength, out, outpos, outlength); + } + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Creates a new buffer of the requested size. + * + * In case you need a different way to allocate buffers, you can override this + * method with a custom behavior. The default implementation allocates a new + * Java direct + * {@link ByteBuffer} on each invocation. + */ + protected ByteBuffer makeBuffer(int sizeInBytes) { + return ByteBuffer.allocateDirect(sizeInBytes); + } +} diff --git a/src/main/java/me/lemire/longcompression/ByteLongCODEC.java b/src/main/java/me/lemire/longcompression/ByteLongCODEC.java new file mode 100644 index 0000000..dbc6864 --- /dev/null +++ b/src/main/java/me/lemire/longcompression/ByteLongCODEC.java @@ -0,0 +1,62 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +package me.lemire.longcompression; + +import me.lemire.integercompression.IntWrapper; + +/** + * Interface describing a CODEC to compress longs to bytes. + * + * @author Benoit Lacelle + * + */ +public interface ByteLongCODEC { + /** + * Compress data from an array to another array. + * + * Both inpos and outpos are modified to represent how much data was + * read and written to. If 12 longs (inlength = 12) are compressed to 3 + * bytes, then inpos will be incremented by 12 while outpos will be + * incremented by 3. We use IntWrapper to pass the values by reference. + * + * @param in + * input array + * @param inpos + * location in the input array + * @param inlength + * how many longs to compress + * @param out + * output array + * @param outpos + * where to write in the output array + */ + public void compress(long[] in, IntWrapper inpos, int inlength, + byte[] out, IntWrapper outpos); + + /** + * Uncompress data from an array to another array. + * + * Both inpos and outpos parameters are modified to indicate new + * positions after read/write. + * + * @param in + * array containing data in compressed form + * @param inpos + * where to start reading in the array + * @param inlength + * length of the compressed data (ignored by some + * schemes) + * @param out + * array where to write the compressed output + * @param outpos + * where to write the compressed output in out + */ + public void uncompress(byte[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos); + +} diff --git a/src/main/java/me/lemire/longcompression/IntegratedLongCODEC.java b/src/main/java/me/lemire/longcompression/IntegratedLongCODEC.java new file mode 100644 index 0000000..b21ef68 --- /dev/null +++ b/src/main/java/me/lemire/longcompression/IntegratedLongCODEC.java @@ -0,0 +1,11 @@ +package me.lemire.longcompression; + +/** + * This is just like LongCODEC, except that it indicates that delta coding is + * "integrated", so that you don't need a separate step for delta coding. + * + * @author Benoit Lacelle + */ +public interface IntegratedLongCODEC extends LongCODEC { + +} diff --git a/src/main/java/me/lemire/longcompression/LongAs2IntsCodec.java b/src/main/java/me/lemire/longcompression/LongAs2IntsCodec.java new file mode 100644 index 0000000..35c1166 --- /dev/null +++ b/src/main/java/me/lemire/longcompression/LongAs2IntsCodec.java @@ -0,0 +1,189 @@ +package me.lemire.longcompression; + +import java.util.Arrays; + +import me.lemire.integercompression.BinaryPacking; +import me.lemire.integercompression.Composition; +import me.lemire.integercompression.IntCompressor; +import me.lemire.integercompression.IntWrapper; +import me.lemire.integercompression.IntegerCODEC; +import me.lemire.integercompression.VariableByte; + +/** + * A {@link LongCODEC} which split each long in a highpart (32 first bits) and a low part (32 last bits). + * + * @author Benoit Lacelle + * + */ +public class LongAs2IntsCodec implements LongCODEC { + final IntegerCODEC highPartsCodec; + final IntegerCODEC lowPartsCodec; + + public LongAs2IntsCodec(IntegerCODEC highPartsCodec, IntegerCODEC lowPartsCodec) { + this.highPartsCodec = highPartsCodec; + this.lowPartsCodec = lowPartsCodec; + } + + /** + * By default, we expect longs to be slightly above Integer.MAX_VALUE. Hence highParts to be small and positive + * integers. For lowParts, we rely on {@link IntCompressor} default IntegerCODEC + */ + public LongAs2IntsCodec() { + this(new VariableByte(), new Composition(new BinaryPacking(), new VariableByte())); + } + + @Override + public void compress(long[] in, IntWrapper inpos, int inlength, long[] out, IntWrapper outpos) { + if (inlength == 0) { + return; + } + + int[] highParts = new int[inlength]; + int[] lowParts = new int[inlength]; + + for (int i = 0; i < inlength; i++) { + int inPosition = inpos.get() + i; + + highParts[i] = RoaringIntPacking.high(in[inPosition]); + lowParts[i] = RoaringIntPacking.low(in[inPosition]); + } + + // TODO What would be a relevant buffer size? + int[] buffer = new int[inlength * 16]; + + int outPosition = outpos.get(); + + boolean hasLeftover; + { + // The first integer is reserved to hold the number of compressed ints + IntWrapper highPartsOutPosition = new IntWrapper(1); + + highPartsCodec.compress(highParts, new IntWrapper(), inlength, buffer, highPartsOutPosition); + + // Record the compressedHighparts length + buffer[0] = highPartsOutPosition.get() - 1; + + for (int i = 0; i < highPartsOutPosition.get() / 2; i++) { + long pack = RoaringIntPacking.pack(buffer[i * 2], buffer[i * 2 + 1]); + out[outPosition++] = pack; + } + + if (1 == highPartsOutPosition.get() % 2) { + // Shift the trailing integer as first in the buffer + hasLeftover = true; + buffer[0] = buffer[highPartsOutPosition.get() - 1]; + } else { + hasLeftover = false; + } + } + + { + // The first integer is reserved to hold the number of compressed ints + IntWrapper lowPartsOutPosition = new IntWrapper(1); + if (hasLeftover) { + // Keep the trailing int from highParts before the reserved int from lowParts compressed length + lowPartsOutPosition.set(2); + } + + lowPartsCodec.compress(lowParts, new IntWrapper(0), inlength, buffer, lowPartsOutPosition); + + // Record the compressedHighparts length + buffer[hasLeftover ? 1 : 0] = lowPartsOutPosition.get() - (hasLeftover ? 2 : 1); + + for (int i = 0; i < lowPartsOutPosition.get() / 2; i++) { + long pack = RoaringIntPacking.pack(buffer[i * 2], buffer[i * 2 + 1]); + out[outPosition++] = pack; + } + + if (1 == lowPartsOutPosition.get() % 2) { + // The trailing integer is packed with a 0 + long pack = RoaringIntPacking.pack(buffer[lowPartsOutPosition.get() - 1], 0); + out[outPosition++] = pack; + } + } + + inpos.add(inlength); + outpos.set(outPosition); + } + + /** + * inlength is ignored by this codec. We may rely on it instead of storing the compressedLowPart length + */ + @Override + public void uncompress(long[] in, IntWrapper inpos, int inlength, long[] out, IntWrapper outpos) { + if (inlength == 0) { + return; + } + + int longIndex = inpos.get(); + + int nbCompressedHighParts = RoaringIntPacking.high(in[longIndex]); + int[] compressedHighParts = new int[nbCompressedHighParts]; + + // !highPart as we just read the highPart for nbCompressedHighParts + boolean highPart = false; + for (int i = 0; i < nbCompressedHighParts; i++) { + int nextInt; + if (highPart) { + nextInt = RoaringIntPacking.high(in[longIndex + (i + 1) / 2]); + } else { + nextInt = RoaringIntPacking.low(in[longIndex + (i + 1) / 2]); + } + compressedHighParts[i] = nextInt; + + highPart = !highPart; + } + + // TODO What would be a relevant buffer size? + int[] buffer = new int[inlength * 16]; + + IntWrapper highPartsOutPosition = new IntWrapper(); + highPartsCodec.uncompress(compressedHighParts, + new IntWrapper(), + compressedHighParts.length, + buffer, + highPartsOutPosition); + int[] highParts = Arrays.copyOf(buffer, highPartsOutPosition.get()); + + // +1 as we initially read nbCompressedHighParts + int intIndexNbCompressedLowParts = longIndex * 2 + 1 + nbCompressedHighParts; + int nbCompressedLowParts; + if (highPart) { + nbCompressedLowParts = RoaringIntPacking.high(in[intIndexNbCompressedLowParts / 2]); + } else { + nbCompressedLowParts = RoaringIntPacking.low(in[intIndexNbCompressedLowParts / 2]); + } + highPart = !highPart; + + int[] compressedLowParts = new int[nbCompressedLowParts]; + for (int i = 0; i < nbCompressedLowParts; i++) { + int nextInt; + if (highPart) { + nextInt = RoaringIntPacking.high(in[(intIndexNbCompressedLowParts + 1 + i) / 2]); + } else { + nextInt = RoaringIntPacking.low(in[(intIndexNbCompressedLowParts + 1 + i) / 2]); + } + compressedLowParts[i] = nextInt; + + highPart = !highPart; + } + + IntWrapper lowPartsOutPosition = new IntWrapper(); + lowPartsCodec.uncompress(compressedLowParts, + new IntWrapper(), + compressedLowParts.length, + buffer, + lowPartsOutPosition); + int[] lowParts = Arrays.copyOf(buffer, lowPartsOutPosition.get()); + assert highParts.length == lowParts.length; + + int outposition = outpos.get(); + for (int i = 0; i < highParts.length; i++) { + out[outposition++] = RoaringIntPacking.pack(highParts[i], lowParts[i]); + } + + inpos.add(inlength); + outpos.set(outposition); + } + +} diff --git a/src/main/java/me/lemire/longcompression/LongBinaryPacking.java b/src/main/java/me/lemire/longcompression/LongBinaryPacking.java new file mode 100644 index 0000000..b6ea58f --- /dev/null +++ b/src/main/java/me/lemire/longcompression/LongBinaryPacking.java @@ -0,0 +1,153 @@ +package me.lemire.longcompression; + +import me.lemire.integercompression.BinaryPacking; +import me.lemire.integercompression.IntWrapper; +import me.lemire.integercompression.Util; + +/** + * Scheme based on a commonly used idea: can be extremely fast. + * It encodes integers in blocks of 64 longs. For arrays containing + * an arbitrary number of longs, you should use it in conjunction + * with another CODEC: + * + *
LongCODEC ic = 
+ *  new Composition(new LongBinaryPacking(), new LongVariableByte()).
+ * + * Note that this does not use differential coding: if you are working on sorted + * lists, you must compute the deltas separately. + * + *

+ * For details, please see {@link BinaryPacking} + *

+ * + * @author Benoit Lacelle + */ +public final class LongBinaryPacking implements LongCODEC, SkippableLongCODEC { + public final static int BLOCK_SIZE = 64; + private static final int MAX_BIT_WIDTH = Long.SIZE; + + @Override + public void compress(long[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + if (inlength == 0) + return; + out[outpos.get()] = inlength; + outpos.increment(); + headlessCompress(in, inpos, inlength, out, outpos); + } + + @Override + public void headlessCompress(long[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + int tmpoutpos = outpos.get(); + int s = inpos.get(); + // Compress by block of 8 * 64 longs as much as possible + for (; s + BLOCK_SIZE * 8 - 1 < inpos.get() + inlength; s += BLOCK_SIZE * 8) { + // maxbits can be anything between 0 and 64 included: expressed within a byte (1 << 6) + final int mbits1 = LongUtil.maxbits(in, s + 0 * BLOCK_SIZE, BLOCK_SIZE); + final int mbits2 = LongUtil.maxbits(in, s + 1 * BLOCK_SIZE, BLOCK_SIZE); + final int mbits3 = LongUtil.maxbits(in, s + 2 * BLOCK_SIZE, BLOCK_SIZE); + final int mbits4 = LongUtil.maxbits(in, s + 3 * BLOCK_SIZE, BLOCK_SIZE); + final int mbits5 = LongUtil.maxbits(in, s + 4 * BLOCK_SIZE, BLOCK_SIZE); + final int mbits6 = LongUtil.maxbits(in, s + 5 * BLOCK_SIZE, BLOCK_SIZE); + final int mbits7 = LongUtil.maxbits(in, s + 6 * BLOCK_SIZE, BLOCK_SIZE); + final int mbits8 = LongUtil.maxbits(in, s + 7 * BLOCK_SIZE, BLOCK_SIZE); + // The first long expressed the maxbits for the 8 buckets + out[tmpoutpos++] = ((long) mbits1 << 56) | ((long) mbits2 << 48) | ((long) mbits3 << 40) | ((long) mbits4 << 32) | (mbits5 << 24) | (mbits6 << 16) | (mbits7 << 8) | (mbits8); + LongBitPacking.fastpackwithoutmask(in, s + 0 * BLOCK_SIZE, out, tmpoutpos, (int) mbits1); + tmpoutpos += mbits1; + LongBitPacking.fastpackwithoutmask(in, s + 1 * BLOCK_SIZE, out, tmpoutpos, (int) mbits2); + tmpoutpos += mbits2; + LongBitPacking.fastpackwithoutmask(in, s + 2 * BLOCK_SIZE, out, tmpoutpos, (int) mbits3); + tmpoutpos += mbits3; + LongBitPacking.fastpackwithoutmask(in, s + 3 * BLOCK_SIZE, out, tmpoutpos, (int) mbits4); + tmpoutpos += mbits4; + LongBitPacking.fastpackwithoutmask(in, s + 4 * BLOCK_SIZE, out, tmpoutpos, (int) mbits5); + tmpoutpos += mbits5; + LongBitPacking.fastpackwithoutmask(in, s + 5 * BLOCK_SIZE, out, tmpoutpos, (int) mbits6); + tmpoutpos += mbits6; + LongBitPacking.fastpackwithoutmask(in, s + 6 * BLOCK_SIZE, out, tmpoutpos, (int) mbits7); + tmpoutpos += mbits7; + LongBitPacking.fastpackwithoutmask(in, s + 7 * BLOCK_SIZE, out, tmpoutpos, (int) mbits8); + tmpoutpos += mbits8; + } + // Then we compress up to 7 blocks of 64 longs + for (; s < inpos.get() + inlength; s += BLOCK_SIZE ) { + final int mbits = LongUtil.maxbits(in, s, BLOCK_SIZE); + out[tmpoutpos++] = mbits; + LongBitPacking.fastpackwithoutmask(in, s, out, tmpoutpos, mbits); + tmpoutpos += mbits; + } + inpos.add(inlength); + outpos.set(tmpoutpos); + } + + @Override + public void uncompress(long[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos) { + if (inlength == 0) + return; + final int outlength = (int) in[inpos.get()]; + inpos.increment(); + headlessUncompress(in,inpos, inlength,out,outpos,outlength); + } + + @Override + public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos, int num) { + final int outlength = Util.greatestMultiple(num, BLOCK_SIZE); + int tmpinpos = inpos.get(); + int s = outpos.get(); + for (; s + BLOCK_SIZE * 8 - 1 < outpos.get() + outlength; s += BLOCK_SIZE * 8) { + final int mbits1 = (int) ((in[tmpinpos] >>> 56)); + final int mbits2 = (int) ((in[tmpinpos] >>> 48) & 0xFF); + final int mbits3 = (int) ((in[tmpinpos] >>> 40) & 0xFF); + final int mbits4 = (int) ((in[tmpinpos] >>> 32) & 0xFF); + final int mbits5 = (int) ((in[tmpinpos] >>> 24) & 0xFF); + final int mbits6 = (int) ((in[tmpinpos] >>> 16) & 0xFF); + final int mbits7 = (int) ((in[tmpinpos] >>> 8) & 0xFF); + final int mbits8 = (int) ((in[tmpinpos]) & 0xFF); + ++tmpinpos; + LongBitPacking.fastunpack(in, tmpinpos, out, s + 0 * BLOCK_SIZE, mbits1); + tmpinpos += mbits1; + LongBitPacking.fastunpack(in, tmpinpos, out, s + 1 * BLOCK_SIZE, mbits2); + tmpinpos += mbits2; + LongBitPacking.fastunpack(in, tmpinpos, out, s + 2 * BLOCK_SIZE, mbits3); + tmpinpos += mbits3; + LongBitPacking.fastunpack(in, tmpinpos, out, s + 3 * BLOCK_SIZE, mbits4); + tmpinpos += mbits4; + LongBitPacking.fastunpack(in, tmpinpos, out, s + 4 * BLOCK_SIZE, mbits5); + tmpinpos += mbits5; + LongBitPacking.fastunpack(in, tmpinpos, out, s + 5 * BLOCK_SIZE, mbits6); + tmpinpos += mbits6; + LongBitPacking.fastunpack(in, tmpinpos, out, s + 6 * BLOCK_SIZE, mbits7); + tmpinpos += mbits7; + LongBitPacking.fastunpack(in, tmpinpos, out, s + 7 * BLOCK_SIZE, mbits8); + tmpinpos += mbits8; + } + for (; s < outpos.get() + outlength; s += BLOCK_SIZE ) { + final int mbits = (int) in[tmpinpos]; + ++tmpinpos; + LongBitPacking.fastunpack(in, tmpinpos, out, s, mbits); + tmpinpos += mbits; + } + outpos.add(outlength); + inpos.set(tmpinpos); + } + + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int blockCount = inlength / BLOCK_SIZE; + int headersSizeInLongs = blockCount / Long.BYTES + (blockCount % Long.BYTES); + int blocksSizeInLongs = blockCount * MAX_BIT_WIDTH; + compressedPositions.add(blockCount * BLOCK_SIZE); + return headersSizeInLongs + blocksSizeInLongs; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } +} diff --git a/src/main/java/me/lemire/longcompression/LongBitPacking.java b/src/main/java/me/lemire/longcompression/LongBitPacking.java new file mode 100644 index 0000000..2d282ec --- /dev/null +++ b/src/main/java/me/lemire/longcompression/LongBitPacking.java @@ -0,0 +1,146 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +package me.lemire.longcompression; + +import java.util.Arrays; + +/** + * Bitpacking routines + * + *

For details, please see

+ *

+ * Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second + * through vectorization Software: Practice & Experience + * http://onlinelibrary.wiley.com/doi/10.1002/spe.2203/abstract + * http://arxiv.org/abs/1209.2137 + *

+ * + * @author Benoit Lacelle + * + */ +public final class LongBitPacking { + + /** + * Pack 64 longs + * + * @param in + * source array + * @param inpos + * position in source array + * @param out + * output array + * @param outpos + * position in output array + * @param bit + * number of bits to use per long + */ + public static void fastpackwithoutmask(final long[] in, final int inpos, + final long[] out, final int outpos, final int bit) { + if (bit == 0) { + fastpackwithoutmask0(in, inpos, out, outpos); + } else if (bit == 64) { + fastpackwithoutmask64(in, inpos, out, outpos); + } else if (bit > 0 && bit < 64) { + slowpackwithoutmask(in, inpos, out, outpos, bit); + } else { + throw new IllegalArgumentException("Unsupported bit width: " + bit); + } + } + + protected static void fastpackwithoutmask0(final long[] in, int inpos, + final long[] out, int outpos) { + // nothing + } + + protected static void fastpackwithoutmask64(final long[] in, int inpos, + final long[] out, int outpos) { + System.arraycopy(in, inpos, out, outpos, 64); + } + + protected static void slowpackwithoutmask(final long[] in, int inpos, + final long[] out, int outpos, final int bit) { + int bucket = 0; + int shift = 0; + + out[outpos + bucket] = 0L; + for (int i = 0 ; i < 64 ; i++) { + if (shift >= 64) { + bucket++; + out[bucket + outpos] = 0L; + shift -= 64; + + if (shift > 0) { + // There is some leftovers from previous input in the next bucket + out[outpos + bucket] |= in[inpos + i - 1] >> (bit - shift); + } + } + out[outpos + bucket] |= in[inpos + i] << shift; + + shift += bit; + } + } + + + /** + * Unpack the 64 longs + * + * @param in + * source array + * @param inpos + * starting point in the source array + * @param out + * output array + * @param outpos + * starting point in the output array + * @param bit + * how many bits to use per integer + */ + public static void fastunpack(final long[] in, final int inpos, + final long[] out, final int outpos, final int bit) { + if (bit == 0) { + fastunpack0(in, inpos, out, outpos); + } else if (bit == 64) { + fastunpack64(in, inpos, out, outpos); + } else if (bit > 0 && bit < 64) { + slowunpack(in, inpos, out, outpos, bit); + } else { + throw new IllegalArgumentException("Unsupported bit width: " + bit); + } + } + + + protected static void fastunpack0(final long[] in, int inpos, + final long[] out, int outpos) { + Arrays.fill(out, outpos, outpos + 64, 0); + } + + protected static void fastunpack64(final long[] in, int inpos, + final long[] out, int outpos) { + System.arraycopy(in, inpos, out, outpos, 64); + } + + protected static void slowunpack(final long[] in, int inpos, + final long[] out, int outpos, final int bit) { + int bucket = 0; + int shift = 0; + for (int i = 0 ; i < 64 ; i++) { + if (shift >= 64) { + bucket++; + shift -= 64; + + if (shift > 0) { + // There is some leftovers from previous input in the next bucket + out[outpos + i - 1] |= (in[inpos + bucket] << (bit - shift) & ((1L << bit) - 1)); + } + } + out[outpos + i] = ((in[inpos + bucket] >>> shift) & ((1L << bit) - 1)); + + shift += bit; + } + } +} diff --git a/src/main/java/me/lemire/longcompression/LongCODEC.java b/src/main/java/me/lemire/longcompression/LongCODEC.java new file mode 100644 index 0000000..0951ffd --- /dev/null +++ b/src/main/java/me/lemire/longcompression/LongCODEC.java @@ -0,0 +1,62 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +package me.lemire.longcompression; + +import me.lemire.integercompression.IntWrapper; + +/** + * Interface describing a standard CODEC to compress longs. + * + * @author Benoit Lacelle + * + */ +public interface LongCODEC { + /** + * Compress data from an array to another array. + * + * Both inpos and outpos are modified to represent how much data was + * read and written to. If 12 longs (inlength = 12) are compressed to 3 + * longs, then inpos will be incremented by 12 while outpos will be + * incremented by 3. We use IntWrapper to pass the values by reference. + * + * @param in + * input array + * @param inpos + * where to start reading in the array + * @param inlength + * how many longs to compress + * @param out + * output array + * @param outpos + * where to write in the output array + */ + public void compress(long[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos); + + /** + * Uncompress data from an array to another array. + * + * Both inpos and outpos parameters are modified to indicate new + * positions after read/write. + * + * @param in + * array containing data in compressed form + * @param inpos + * where to start reading in the array + * @param inlength + * length of the compressed data (ignored by some + * schemes) + * @param out + * array where to write the uncompressed output + * @param outpos + * where to start writing the uncompressed output in out + */ + public void uncompress(long[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos); + +} diff --git a/src/main/java/me/lemire/longcompression/LongComposition.java b/src/main/java/me/lemire/longcompression/LongComposition.java new file mode 100644 index 0000000..5111a51 --- /dev/null +++ b/src/main/java/me/lemire/longcompression/LongComposition.java @@ -0,0 +1,71 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ +package me.lemire.longcompression; + +import me.lemire.integercompression.IntWrapper; + +/** + * Helper class to compose schemes. + * + * @author Benoit Lacelle + */ +public class LongComposition implements LongCODEC { + LongCODEC F1, F2; + + /** + * Compose a scheme from a first one (f1) and a second one (f2). The + * first one is called first and then the second one tries to compress + * whatever remains from the first run. + * + * By convention, the first scheme should be such that if, during + * decoding, a 32-bit zero is first encountered, then there is no + * output. + * + * @param f1 + * first codec + * @param f2 + * second codec + */ + public LongComposition(LongCODEC f1, LongCODEC f2) { + F1 = f1; + F2 = f2; + } + + @Override + public void compress(long[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos) { + if (inlength == 0) { + return; + } + int inposInit = inpos.get(); + int outposInit = outpos.get(); + F1.compress(in, inpos, inlength, out, outpos); + if (outpos.get() == outposInit) { + out[outposInit] = 0; + outpos.increment(); + } + inlength -= inpos.get() - inposInit; + F2.compress(in, inpos, inlength, out, outpos); + } + + @Override + public void uncompress(long[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos) { + if (inlength == 0) + return; + final int init = inpos.get(); + F1.uncompress(in, inpos, inlength, out, outpos); + inlength -= inpos.get() - init; + F2.uncompress(in, inpos, inlength, out, outpos); + } + + @Override + public String toString() { + return F1.toString() + " + " + F2.toString(); + } + +} diff --git a/src/main/java/me/lemire/longcompression/LongCompressor.java b/src/main/java/me/lemire/longcompression/LongCompressor.java new file mode 100644 index 0000000..246647f --- /dev/null +++ b/src/main/java/me/lemire/longcompression/LongCompressor.java @@ -0,0 +1,68 @@ +package me.lemire.longcompression; + +import java.util.Arrays; + +import me.lemire.integercompression.IntWrapper; + +/** + * This is a convenience class that wraps a codec to provide + * a "friendly" API. + * + * @author Benoit Lacelle + */ +public class LongCompressor { + + SkippableLongCODEC codec; + + /** + * Constructor wrapping a codec. + * + * @param c the underlying codec + */ + public LongCompressor(SkippableLongCODEC c) { + codec = c; + } + + /** + * Constructor with default codec. + */ + public LongCompressor() { + codec = new SkippableLongComposition(new LongBinaryPacking(), + new LongVariableByte()); + } + + /** + * Compress an array and returns the compressed result as a new array. + * + * @param input array to be compressed + * @return compressed array + */ + public long[] compress(long[] input) { + int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), input.length); + long[] compressed = new long[maxCompressedLength + 1]; // +1 to store the length of the input + // Store at index=0 the length of the input, hence enabling .headlessCompress + compressed[0] = input.length; + IntWrapper outpos = new IntWrapper(1); + codec.headlessCompress(input, new IntWrapper(0), input.length, compressed, outpos); + compressed = Arrays.copyOf(compressed,outpos.intValue()); + return compressed; + } + + /** + * Uncompress an array and returns the uncompressed result as a new array. + * + * @param compressed compressed array + * @return uncompressed array + */ + public long[] uncompress(long[] compressed) { + // Read at index=0 the length of the input, hence enabling .headlessUncompress + long[] decompressed = new long[(int) compressed[0]]; + IntWrapper inpos = new IntWrapper(1); + codec.headlessUncompress(compressed, inpos, + compressed.length - inpos.intValue(), + decompressed, new IntWrapper(0), + decompressed.length); + return decompressed; + } + +} diff --git a/src/main/java/me/lemire/longcompression/LongJustCopy.java b/src/main/java/me/lemire/longcompression/LongJustCopy.java new file mode 100644 index 0000000..95abc1e --- /dev/null +++ b/src/main/java/me/lemire/longcompression/LongJustCopy.java @@ -0,0 +1,58 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +package me.lemire.longcompression; + +import me.lemire.integercompression.IntWrapper; + +/** + * @author Benoit lacelle + * + */ +public final class LongJustCopy implements LongCODEC, SkippableLongCODEC { + + @Override + public void headlessCompress(long[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos) { + System.arraycopy(in, inpos.get(), out, outpos.get(), inlength); + inpos.add(inlength); + outpos.add(inlength); + } + + @Override + public void uncompress(long[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos) { + headlessUncompress(in,inpos,inlength,out,outpos,inlength); + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + @Override + public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos, int num) { + System.arraycopy(in, inpos.get(), out, outpos.get(), num); + inpos.add(num); + outpos.add(num); + + } + + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + compressedPositions.add(inlength); + return inlength; + } + + @Override + public void compress(long[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos) { + headlessCompress(in,inpos,inlength,out,outpos); + } + +} diff --git a/src/main/java/me/lemire/longcompression/LongUtil.java b/src/main/java/me/lemire/longcompression/LongUtil.java new file mode 100644 index 0000000..7bdce83 --- /dev/null +++ b/src/main/java/me/lemire/longcompression/LongUtil.java @@ -0,0 +1,52 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +package me.lemire.longcompression; + +/** + * These are unofficial helpers related to long compression + * + * @author Benoit Lacelle + * + */ +@Deprecated +public class LongUtil { + + /** + * Compute the maximum of the integer logarithms (ceil(log(x+1)) of a range + * of value + * + * @param i + * source array + * @param pos + * starting position + * @param length + * number of integers to consider + * @return integer logarithm + */ + public static int maxbits(long[] i, int pos, int length) { + long mask = 0; + for (int k = pos; k < pos + length; ++k) + mask |= i[k]; + return bits(mask); + } + + /** + * Compute the integer logarithms (ceil(log(x+1)) of a value + * + * @param i + * source value + * @return integer logarithm + */ + public static int bits(long i) { + return 64 - Long.numberOfLeadingZeros(i); + } + + protected static String longToBinaryWithLeading(long l) { + return String.format("%64s", Long.toBinaryString(l)).replace(' ', '0'); + } +} diff --git a/src/main/java/me/lemire/longcompression/LongVariableByte.java b/src/main/java/me/lemire/longcompression/LongVariableByte.java new file mode 100644 index 0000000..63c194b --- /dev/null +++ b/src/main/java/me/lemire/longcompression/LongVariableByte.java @@ -0,0 +1,348 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ +package me.lemire.longcompression; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.LongBuffer; + +import me.lemire.integercompression.IntWrapper; + +/** + * Implementation of variable-byte. For best performance, use it using the + * ByteLongCODEC interface. + * + * Note that this does not use differential coding: if you are working on sorted + * lists, you must compute the deltas separately. + * + * @author Benoit Lacelle + */ +public class LongVariableByte implements LongCODEC, ByteLongCODEC, SkippableLongCODEC { + private static final int MAX_BYTES_PER_INT = 10; + + private static byte extract7bits(int i, long val) { + return (byte) ((val >>> (7 * i)) & ((1 << 7) - 1)); + } + + private static byte extract7bitsmaskless(int i, long val) { + return (byte) ((val >>> (7 * i))); + } + @Override + public void compress(long[] in, IntWrapper inpos, int inlength, long[] out, + IntWrapper outpos) { + headlessCompress(in, inpos, inlength, out, outpos); + } + + @Override + public void headlessCompress(long[] in, IntWrapper inpos, int inlength, long[] out, + IntWrapper outpos) { + if (inlength == 0) + return; + // Worst case: we write 10 bytes per long, hence 2 longs for a long, hence 16 bytes per long + ByteBuffer buf = makeBuffer(inlength * 16); + buf.order(ByteOrder.LITTLE_ENDIAN); + for (int k = inpos.get(); k < inpos.get() + inlength; ++k) { + final long val = in[k]; + if (val >= 0 && val < (1 << 7)) { + buf.put((byte) (val | (1 << 7))); + } else if (val >= 0 && val < (1 << 14)) { + buf.put((byte) extract7bits(0, val)); + buf.put((byte) (extract7bitsmaskless(1, (val)) | (1 << 7))); + } else if (val >= 0 && val < (1 << 21)) { + buf.put((byte) extract7bits(0, val)); + buf.put((byte) extract7bits(1, val)); + buf.put((byte) (extract7bitsmaskless(2, (val)) | (1 << 7))); + } else if (val >= 0 && val < (1 << 28)) { + buf.put((byte) extract7bits(0, val)); + buf.put((byte) extract7bits(1, val)); + buf.put((byte) extract7bits(2, val)); + buf.put((byte) (extract7bitsmaskless(3, (val)) | (1 << 7))); + } else if (val >= 0 && val < (1L << 35)) { + buf.put((byte) extract7bits(0, val)); + buf.put((byte) extract7bits(1, val)); + buf.put((byte) extract7bits(2, val)); + buf.put((byte) extract7bits(3, val)); + buf.put((byte) (extract7bitsmaskless(4, (val)) | (1 << 7))); + } else if (val >= 0 && val < (1L << 42)) { + buf.put((byte) extract7bits(0, val)); + buf.put((byte) extract7bits(1, val)); + buf.put((byte) extract7bits(2, val)); + buf.put((byte) extract7bits(3, val)); + buf.put((byte) extract7bits(4, val)); + buf.put((byte) (extract7bitsmaskless(5, (val)) | (1 << 7))); + } else if (val >= 0 && val < (1L << 49)) { + buf.put((byte) extract7bits(0, val)); + buf.put((byte) extract7bits(1, val)); + buf.put((byte) extract7bits(2, val)); + buf.put((byte) extract7bits(3, val)); + buf.put((byte) extract7bits(4, val)); + buf.put((byte) extract7bits(5, val)); + buf.put((byte) (extract7bitsmaskless(6, (val)) | (1 << 7))); + } else if (val >= 0 && val < (1L << 56)) { + buf.put((byte) extract7bits(0, val)); + buf.put((byte) extract7bits(1, val)); + buf.put((byte) extract7bits(2, val)); + buf.put((byte) extract7bits(3, val)); + buf.put((byte) extract7bits(4, val)); + buf.put((byte) extract7bits(5, val)); + buf.put((byte) extract7bits(6, val)); + buf.put((byte) (extract7bitsmaskless(7, (val)) | (1 << 7))); + } else if (val >= 0) { + buf.put((byte) extract7bits(0, val)); + buf.put((byte) extract7bits(1, val)); + buf.put((byte) extract7bits(2, val)); + buf.put((byte) extract7bits(3, val)); + buf.put((byte) extract7bits(4, val)); + buf.put((byte) extract7bits(5, val)); + buf.put((byte) extract7bits(6, val)); + buf.put((byte) extract7bits(7, val)); + buf.put((byte) (extract7bitsmaskless(8, (val)) | (1 << 7))); + } else { + buf.put((byte) extract7bits(0, val)); + buf.put((byte) extract7bits(1, val)); + buf.put((byte) extract7bits(2, val)); + buf.put((byte) extract7bits(3, val)); + buf.put((byte) extract7bits(4, val)); + buf.put((byte) extract7bits(5, val)); + buf.put((byte) extract7bits(6, val)); + buf.put((byte) extract7bits(7, val)); + buf.put((byte) extract7bits(8, val)); + buf.put((byte) (extract7bitsmaskless(9, (val)) | (1 << 7))); + } + } + while (buf.position() % 8 != 0) + buf.put((byte) 0); + final int length = buf.position(); + buf.flip(); + LongBuffer ibuf = buf.asLongBuffer(); + ibuf.get(out, outpos.get(), length / 8); + outpos.add(length / 8); + inpos.add(inlength); + } + + @Override + public void compress(long[] in, IntWrapper inpos, int inlength, byte[] out, + IntWrapper outpos) { + if (inlength == 0) + return; + int outpostmp = outpos.get(); + for (int k = inpos.get(); k < inpos.get() + inlength; ++k) { + final long val = in[k]; + if (val >= 0 && val < (1 << 7)) { + out[outpostmp++] = (byte) (val | (1 << 7)); + } else if (val >= 0 && val < (1 << 14)) { + out[outpostmp++] = (byte) extract7bits(0, val); + out[outpostmp++] = (byte) (extract7bitsmaskless(1, (val)) | (1 << 7)); + } else if (val >= 0 && val < (1 << 21)) { + out[outpostmp++] = (byte) extract7bits(0, val); + out[outpostmp++] = (byte) extract7bits(1, val); + out[outpostmp++] = (byte) (extract7bitsmaskless(2, (val)) | (1 << 7)); + } else if (val >= 0 && val < (1 << 28)) { + out[outpostmp++] = (byte) extract7bits(0, val); + out[outpostmp++] = (byte) extract7bits(1, val); + out[outpostmp++] = (byte) extract7bits(2, val); + out[outpostmp++] = (byte) (extract7bitsmaskless(3, (val)) | (1 << 7)); + } else if (val >= 0 && val < (1L << 35)) { + out[outpostmp++] = (byte) extract7bits(0, val); + out[outpostmp++] = (byte) extract7bits(1, val); + out[outpostmp++] = (byte) extract7bits(2, val); + out[outpostmp++] = (byte) extract7bits(3, val); + out[outpostmp++] = (byte) (extract7bitsmaskless(4, (val)) | (1 << 7)); + } else if (val >= 0 && val < (1L << 42)) { + out[outpostmp++] = (byte) extract7bits(0, val); + out[outpostmp++] = (byte) extract7bits(1, val); + out[outpostmp++] = (byte) extract7bits(2, val); + out[outpostmp++] = (byte) extract7bits(3, val); + out[outpostmp++] = (byte) extract7bits(4, val); + out[outpostmp++] = (byte) (extract7bitsmaskless(5, (val)) | (1 << 7)); + } else if (val >= 0 && val < (1L << 49)) { + out[outpostmp++] = (byte) extract7bits(0, val); + out[outpostmp++] = (byte) extract7bits(1, val); + out[outpostmp++] = (byte) extract7bits(2, val); + out[outpostmp++] = (byte) extract7bits(3, val); + out[outpostmp++] = (byte) extract7bits(4, val); + out[outpostmp++] = (byte) extract7bits(5, val); + out[outpostmp++] = (byte) (extract7bitsmaskless(6, (val)) | (1 << 7)); + } else if (val >= 0 && val < (1L << 56)) { + out[outpostmp++] = (byte) extract7bits(0, val); + out[outpostmp++] = (byte) extract7bits(1, val); + out[outpostmp++] = (byte) extract7bits(2, val); + out[outpostmp++] = (byte) extract7bits(3, val); + out[outpostmp++] = (byte) extract7bits(4, val); + out[outpostmp++] = (byte) extract7bits(5, val); + out[outpostmp++] = (byte) extract7bits(6, val); + out[outpostmp++] = (byte) (extract7bitsmaskless(7, (val)) | (1 << 7)); + } else if (val >= 0) { + out[outpostmp++] = (byte) extract7bits(0, val); + out[outpostmp++] = (byte) extract7bits(1, val); + out[outpostmp++] = (byte) extract7bits(2, val); + out[outpostmp++] = (byte) extract7bits(3, val); + out[outpostmp++] = (byte) extract7bits(4, val); + out[outpostmp++] = (byte) extract7bits(5, val); + out[outpostmp++] = (byte) extract7bits(6, val); + out[outpostmp++] = (byte) extract7bits(7, val); + out[outpostmp++] = (byte) (extract7bitsmaskless(8, (val)) | (1 << 7)); + } else { + out[outpostmp++] = (byte) extract7bits(0, val); + out[outpostmp++] = (byte) extract7bits(1, val); + out[outpostmp++] = (byte) extract7bits(2, val); + out[outpostmp++] = (byte) extract7bits(3, val); + out[outpostmp++] = (byte) extract7bits(4, val); + out[outpostmp++] = (byte) extract7bits(5, val); + out[outpostmp++] = (byte) extract7bits(6, val); + out[outpostmp++] = (byte) extract7bits(7, val); + out[outpostmp++] = (byte) extract7bits(8, val); + out[outpostmp++] = (byte) (extract7bitsmaskless(9, (val)) | (1 << 7)); + } + } + outpos.set(outpostmp); + inpos.add(inlength); + } + + @Override + public void uncompress(long[] in, IntWrapper inpos, int inlength, long[] out, + IntWrapper outpos) { + int s = 0; + long val = 0; + int p = inpos.get(); + int finalp = inpos.get() + inlength; + int tmpoutpos = outpos.get(); + for (long v = 0, shift = 0; p < finalp;) { + val = in[p]; + long c = (byte) (val >>> s); + // Shift to next byte + s += 8; + // Shift to next long if s==64 + p += s>>6; + // Cycle from 63 to 0 + s = s & 63; + v += ((c & 127) << shift); + if ((c & 128) == 128) { + out[tmpoutpos++] = v; + v = 0; + shift = 0; + } else + shift += 7; + assert shift < 64; + } + outpos.set(tmpoutpos); + inpos.add(inlength); + } + + @Override + public void uncompress(byte[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos) { + int p = inpos.get(); + int finalp = inpos.get() + inlength; + int tmpoutpos = outpos.get(); + for (long v = 0; p < finalp; out[tmpoutpos++] = v) { + v = in[p] & 0x7F; + if (in[p] < 0) { + p += 1; + continue; + } + v = ((in[p + 1] & 0x7F) << 7) | v; + if (in[p + 1] < 0) { + p += 2; + continue; + } + v = ((in[p + 2] & 0x7F) << 14) | v; + if (in[p + 2] < 0 ) { + p += 3; + continue; + } + v = ((in[p + 3] & 0x7F) << 21) | v; + if (in[p + 3] < 0) { + p += 4; + continue; + } + v = (((long) in[p + 4] & 0x7F) << 28) | v; + if (in[p + 4] < 0) { + p += 5; + continue; + } + v = (((long) in[p + 5] & 0x7F) << 35) | v; + if (in[p + 5] < 0) { + p += 6; + continue; + } + v = (((long) in[p + 6] & 0x7F) << 42) | v; + if (in[p + 6] < 0) { + p += 7; + continue; + } + v = (((long) in[p + 7] & 0x7F) << 49) | v; + if (in[p + 7] < 0) { + p += 8; + continue; + } + v = (((long) in[p + 8] & 0x7F) << 56) | v; + if (in[p + 8] < 0) { + p += 9; + continue; + } + v = (((long) in[p + 9] & 0x7F) << 63) | v; + p += 10; + } + outpos.set(tmpoutpos); + inpos.add(p); + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + @Override + public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, long[] out, + IntWrapper outpos, int num) { + int s = 0; + long val = 0; + int p = inpos.get(); + int tmpoutpos = outpos.get(); + int finaloutpos = num + tmpoutpos; + for (long v = 0, shift = 0; tmpoutpos < finaloutpos;) { + val = in[p]; + long c = val >>> s; + // Shift to next byte + s += 8; + // Shift to next long if s == 64 + p += s>>6; + // Cycle from 63 to 0 + s = s & 63; + v += ((c & 127) << shift); + if ((c & 128) == 128) { + out[tmpoutpos++] = v; + v = 0; + shift = 0; + } else + shift += 7; + assert shift < 64; + } + outpos.set(tmpoutpos); + inpos.set(p + (s!=0 ? 1 : 0)); + } + + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int maxLengthInBytes = inlength * MAX_BYTES_PER_INT; + int maxLengthInLongs = (maxLengthInBytes + Long.BYTES - 1) / Long.BYTES; + compressedPositions.add(inlength); + return maxLengthInLongs; + } + + /** + * Creates a new buffer of the requested size. + * + * In case you need a different way to allocate buffers, you can override this method + * with a custom behavior. The default implementation allocates a new Java direct + * {@link ByteBuffer} on each invocation. + */ + protected ByteBuffer makeBuffer(int sizeInBytes) { + return ByteBuffer.allocateDirect(sizeInBytes); + } +} diff --git a/src/main/java/me/lemire/longcompression/RoaringIntPacking.java b/src/main/java/me/lemire/longcompression/RoaringIntPacking.java new file mode 100644 index 0000000..d6b6baa --- /dev/null +++ b/src/main/java/me/lemire/longcompression/RoaringIntPacking.java @@ -0,0 +1,46 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ +package me.lemire.longcompression; + +/** + * Used to hold the logic packing 2 integers in a long, and separating a long in two integers. It is + * useful in {@link Roaring64NavigableMap} as the implementation split the input long in two + * integers, one used as key of a NavigableMap while the other is added in a Bitmap + * + * @author Benoit Lacelle + * + */ +// Duplicated from RoaringBitmap +class RoaringIntPacking { + + /** + * + * @param id any long, positive or negative + * @return an int holding the 32 highest order bits of information of the input long + */ + public static int high(long id) { + return (int) (id >> 32); + } + + /** + * + * @param id any long, positive or negative + * @return an int holding the 32 lowest order bits of information of the input long + */ + public static int low(long id) { + return (int) id; + } + + /** + * + * @param high an integer representing the highest order bits of the output long + * @param low an integer representing the lowest order bits of the output long + * @return a long packing together the integers as computed by + * {@link RoaringIntPacking#high(long)} and {@link RoaringIntPacking#low(long)} + */ + // https://stackoverflow.com/questions/12772939/java-storing-two-ints-in-a-long + public static long pack(int high, int low) { + return (((long) high) << 32) | (low & 0xffffffffL); + } +} diff --git a/src/main/java/me/lemire/longcompression/SkippableLongCODEC.java b/src/main/java/me/lemire/longcompression/SkippableLongCODEC.java new file mode 100644 index 0000000..33fd562 --- /dev/null +++ b/src/main/java/me/lemire/longcompression/SkippableLongCODEC.java @@ -0,0 +1,87 @@ +/** + * This is code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +package me.lemire.longcompression; + +import me.lemire.integercompression.IntWrapper; + +/** + * Interface describing a standard CODEC to compress longs. This is a + * variation on the LongCODEC interface meant to be used for random access + * (i.e., given a large array, you can segment it and decode just the subarray you need). + * + * The main difference is that we must specify the number of longs we wish to + * decode. This information should be stored elsewhere. + * + * This interface was designed by the Terrier team for their search engine. + * + * @author Benoit Lacelle + * + */ +public interface SkippableLongCODEC { + /** + * Compress data from an array to another array. + * + * Both inpos and outpos are modified to represent how much data was read + * and written to. If 12 longs (inlength = 12) are compressed to 3 longs, then + * inpos will be incremented by 12 while outpos will be incremented by 3. We + * use IntWrapper to pass the values by reference. + * + * @param in + * input array + * @param inpos + * where to start reading in the array + * @param inlength + * how many longs to compress + * @param out + * output array + * @param outpos + * where to write in the output array + */ + public void headlessCompress(long[] in, IntWrapper inpos, int inlength, long[] out, + IntWrapper outpos); + + /** + * Uncompress data from an array to another array. + * + * Both inpos and outpos parameters are modified to indicate new positions + * after read/write. + * + * @param in + * array containing data in compressed form + * @param inpos + * where to start reading in the array + * @param inlength + * length of the compressed data (ignored by some schemes) + * @param out + * array where to write the uncompressed output + * @param outpos + * where to start writing the uncompressed output in out + * @param num + * number of longs we want to decode, the actual number of longs decoded can be less + */ + public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, long[] out, + IntWrapper outpos, int num); + + /** + * Compute the maximum number of longs that might be required to store + * the compressed form of a given input array segment, without headers. + *

+ * This is useful to pre-allocate the output buffer before calling + * {@link #headlessCompress(long[], IntWrapper, int, long[], IntWrapper)}. + *

+ * + * @param compressedPositions + * since not all schemes compress every input integer, this parameter + * returns how many input integers will actually be compressed. + * This is useful when composing multiple schemes. + * @param inlength + * number of longs to be compressed + * @return the maximum number of longs needed in the output array + */ + int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength); +} diff --git a/src/main/java/me/lemire/longcompression/SkippableLongComposition.java b/src/main/java/me/lemire/longcompression/SkippableLongComposition.java new file mode 100644 index 0000000..eb03b72 --- /dev/null +++ b/src/main/java/me/lemire/longcompression/SkippableLongComposition.java @@ -0,0 +1,82 @@ +/** + * This is code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ +package me.lemire.longcompression; + +import me.lemire.integercompression.IntWrapper; + +/** + * Helper class to compose schemes. + * + * @author Benoit Lacelle + */ +public class SkippableLongComposition implements SkippableLongCODEC { + SkippableLongCODEC F1, F2; + + /** + * Compose a scheme from a first one (f1) and a second one (f2). The first + * one is called first and then the second one tries to compress whatever + * remains from the first run. + * + * By convention, the first scheme should be such that if, during decoding, + * a 32-bit zero is first encountered, then there is no output. + * + * @param f1 + * first codec + * @param f2 + * second codec + */ + public SkippableLongComposition(SkippableLongCODEC f1, + SkippableLongCODEC f2) { + F1 = f1; + F2 = f2; + } + + @Override + public void headlessCompress(long[] in, IntWrapper inpos, int inlength, long[] out, + IntWrapper outpos) { + int init = inpos.get(); + int outposInit = outpos.get(); + F1.headlessCompress(in, inpos, inlength, out, outpos); + if (outpos.get() == outposInit) { + out[outposInit] = 0; + outpos.increment(); + } + inlength -= inpos.get() - init; + F2.headlessCompress(in, inpos, inlength, out, outpos); + } + + @Override + public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, long[] out, + IntWrapper outpos, int num) { + int init = inpos.get(); + int outposInit = outpos.get(); + + F1.headlessUncompress(in, inpos, inlength, out, outpos, num); + if (inpos.get() == init) { + inpos.increment(); + } + inlength -= inpos.get() - init; + num -= outpos.get() - outposInit; + F2.headlessUncompress(in, inpos, inlength, out, outpos, num); + } + + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int init = compressedPositions.get(); + int maxLength = F1.maxHeadlessCompressedLength(compressedPositions, inlength); + maxLength += 1; // Add +1 for the potential F2 header. Question: is this header actually needed in the headless version? + inlength -= compressedPositions.get() - init; + maxLength += F2.maxHeadlessCompressedLength(compressedPositions, inlength); + return maxLength; + } + + @Override + public String toString() { + return F1.toString() + "+" + F2.toString(); + } + +} diff --git a/src/main/java/me/lemire/longcompression/differential/LongDelta.java b/src/main/java/me/lemire/longcompression/differential/LongDelta.java new file mode 100644 index 0000000..8399f94 --- /dev/null +++ b/src/main/java/me/lemire/longcompression/differential/LongDelta.java @@ -0,0 +1,150 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +package me.lemire.longcompression.differential; + +/** + * Generic class to compute differential coding. + * + * @author Benoit lacelle + * + */ +public final class LongDelta { + + /** + * Apply differential coding (in-place). + * + * @param data + * data to be modified + */ + public static void delta(long[] data) { + for (int i = data.length - 1; i > 0; --i) { + data[i] -= data[i - 1]; + } + } + + /** + * Apply differential coding (in-place) given an initial value. + * + * @param data + * data to be modified + * @param start + * starting index + * @param length + * number of integers to process + * @param init + * initial value + * @return next initial vale + */ + public static long delta(long[] data, int start, int length, int init) { + final long nextinit = data[start + length - 1]; + for (int i = length - 1; i > 0; --i) { + data[start + i] -= data[start + i - 1]; + } + data[start] -= init; + return nextinit; + } + + /** + * Compute differential coding given an initial value. Output is written + * to a provided array: must have length "length" or better. + * + * @param data + * data to be modified + * @param start + * starting index + * @param length + * number of integers to process + * @param init + * initial value + * @param out + * output array + * @return next initial vale + */ + public static long delta(long[] data, int start, int length, int init, + long[] out) { + for (int i = length - 1; i > 0; --i) { + out[i] = data[start + i] - data[start + i - 1]; + } + out[0] = data[start] - init; + return data[start + length - 1]; + } + + /** + * Undo differential coding (in-place). Effectively computes a prefix + * sum. + * + * @param data + * to be modified. + */ + public static void inverseDelta(long[] data) { + for (int i = 1; i < data.length; ++i) { + data[i] += data[i - 1]; + } + } + + /** + * Undo differential coding (in-place). Effectively computes a prefix + * sum. Like inverseDelta, only faster. + * + * @param data + * to be modified + */ + public static void fastinverseDelta(long[] data) { + int sz0 = data.length / 4 * 4; + int i = 1; + if (sz0 >= 4) { + long a = data[0]; + for (; i < sz0 - 4; i += 4) { + a = data[i] += a; + a = data[i + 1] += a; + a = data[i + 2] += a; + a = data[i + 3] += a; + } + } + + for (; i < data.length; ++i) { + data[i] += data[i - 1]; + } + } + + /** + * Undo differential coding (in-place). Effectively computes a prefix + * sum. Like inverseDelta, only faster. Uses an initial value. + * + * @param data + * to be modified + * @param start + * starting index + * @param length + * number of integers to process + * @param init + * initial value + * @return next initial value + */ + public static long fastinverseDelta(long[] data, int start, int length, + int init) { + data[start] += init; + int sz0 = length / 4 * 4; + int i = 1; + if (sz0 >= 4) { + long a = data[start]; + for (; i < sz0 - 4; i += 4) { + a = data[start + i] += a; + a = data[start + i + 1] += a; + a = data[start + i + 2] += a; + a = data[start + i + 3] += a; + } + } + + for (; i != length; ++i) { + data[start + i] += data[start + i - 1]; + } + return data[start + length - 1]; + } + +} diff --git a/src/main/java/module-info.java b/src/main/java/module-info.java new file mode 100644 index 0000000..f134601 --- /dev/null +++ b/src/main/java/module-info.java @@ -0,0 +1,12 @@ +// Copyright (C) 2022 Intel Corporation + +// SPDX-License-Identifier: Apache-2.0 +module me.lemire.integercompression { + // This is currently only for advanced users: + // requires jdk.incubator.vector; + exports me.lemire.integercompression; + exports me.lemire.longcompression; + exports me.lemire.longcompression.differential; + exports me.lemire.integercompression.differential; + // exports me.lemire.integercompression.vector; +} diff --git a/src/test/java/me/lemire/integercompression/AdhocTest.java b/src/test/java/me/lemire/integercompression/AdhocTest.java index bced6c0..ee911b3 100644 --- a/src/test/java/me/lemire/integercompression/AdhocTest.java +++ b/src/test/java/me/lemire/integercompression/AdhocTest.java @@ -1,3 +1,10 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + package me.lemire.integercompression; import org.junit.Assert; @@ -15,13 +22,48 @@ @SuppressWarnings({ "static-method" }) public class AdhocTest { - - /** - * - */ + @Test + public void testIssue59() { + FastPFOR128 fastpfor = new FastPFOR128(); + + int N = 9984; + int[] data = new int[N]; + for (var i = 0; i < N; i += 150) { + data[i] = i; + } + + int[] compressedoutput1 = new int[N + 1024]; + + IntWrapper inputoffset1 = new IntWrapper(0); + IntWrapper outputoffset1 = new IntWrapper(0); + + fastpfor.compress(data, inputoffset1, N, compressedoutput1, outputoffset1); + int compressedsize1 = outputoffset1.get(); + + int[] recovered1 = new int[N]; + inputoffset1 = new IntWrapper(0); + outputoffset1 = new IntWrapper(0); + fastpfor.uncompress(compressedoutput1, outputoffset1, compressedsize1, recovered1, inputoffset1); + Assert.assertArrayEquals(data, recovered1); + + int[] compressedoutput2 = new int[N + 1024]; + + IntWrapper inputoffset2 = new IntWrapper(0); + IntWrapper outputoffset2 = new IntWrapper(0); + + fastpfor.compress(data, inputoffset2, N, compressedoutput2, outputoffset2); + int compressedsize2 = outputoffset2.get(); + + int[] recovered2 = new int[N]; + inputoffset2 = new IntWrapper(0); + outputoffset2 = new IntWrapper(0); + fastpfor.uncompress(compressedoutput2, outputoffset2, compressedsize2, recovered2, inputoffset2); + Assert.assertArrayEquals(data, recovered2); + } + @Test public void testIssue29() { - for(int x = 0; x < 64; x++) { + for(int x = 0; x < 64; x++) { int[] a = {2, 3, 4, 5}; int[] b = new int[90]; int[] c = new int[a.length]; @@ -35,7 +77,7 @@ public void testIssue29() { IntWrapper cOffset = new IntWrapper(0); codec.uncompress(b, bOffset, len, c, cOffset); Assert.assertArrayEquals(a,c); - } + } } /** @@ -43,20 +85,20 @@ public void testIssue29() { */ @Test public void testIssue29b() { - for(int x = 0; x < 64; x++) { - int[] a = {2, 3, 4, 5}; - int[] b = new int[90]; - int[] c = new int[a.length]; - SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte()); - IntWrapper aOffset = new IntWrapper(0); - IntWrapper bOffset = new IntWrapper(x); - codec.headlessCompress(a, aOffset, a.length, b, bOffset); - int len = bOffset.get() - x; - bOffset.set(x); - IntWrapper cOffset = new IntWrapper(0); - codec.headlessUncompress(b, bOffset, len, c, cOffset, a.length); - Assert.assertArrayEquals(a,c); - } + for(int x = 0; x < 64; x++) { + SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte()); + int[] a = {2, 3, 4, 5}; + int[] b = new int[x + codec.maxHeadlessCompressedLength(new IntWrapper(0), a.length)]; + int[] c = new int[a.length]; + IntWrapper aOffset = new IntWrapper(0); + IntWrapper bOffset = new IntWrapper(x); + codec.headlessCompress(a, aOffset, a.length, b, bOffset); + int len = bOffset.get() - x; + bOffset.set(x); + IntWrapper cOffset = new IntWrapper(0); + codec.headlessUncompress(b, bOffset, len, c, cOffset, a.length); + Assert.assertArrayEquals(a,c); + } } @@ -64,30 +106,27 @@ public void testIssue29b() { * */ @Test - public void testIssue41() { - for (int x = 0; x < 64; x++) { - int[] a = { 2, 3, 4, 5 }; - int[] b = new int[90]; - int[] c = new int[a.length]; - SkippableIntegratedIntegerCODEC codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(), - new IntegratedVariableByte()); - IntWrapper aOffset = new IntWrapper(0); - IntWrapper bOffset = new IntWrapper(x); - IntWrapper initValue = new IntWrapper(0); - - codec.headlessCompress(a, aOffset, a.length, b, bOffset, initValue); - int len = bOffset.get() - x; - bOffset.set(x); - IntWrapper cOffset = new IntWrapper(0); - initValue = new IntWrapper(0); - codec.headlessUncompress(b, bOffset, len, c, cOffset, a.length, initValue); - Assert.assertArrayEquals(a, c); - } - } + public void testIssue41() { + for (int x = 0; x < 64; x++) { + SkippableIntegratedIntegerCODEC codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(), + new IntegratedVariableByte()); + int[] a = { 2, 3, 4, 5 }; + int[] b = new int[x + codec.maxHeadlessCompressedLength(new IntWrapper(0), a.length)]; + int[] c = new int[a.length]; + IntWrapper aOffset = new IntWrapper(0); + IntWrapper bOffset = new IntWrapper(x); + IntWrapper initValue = new IntWrapper(0); + + codec.headlessCompress(a, aOffset, a.length, b, bOffset, initValue); + int len = bOffset.get() - x; + bOffset.set(x); + IntWrapper cOffset = new IntWrapper(0); + initValue = new IntWrapper(0); + codec.headlessUncompress(b, bOffset, len, c, cOffset, a.length, initValue); + Assert.assertArrayEquals(a, c); + } + } - /** - * a test - */ @Test public void biggerCompressedArray0() { // No problem: for comparison. @@ -95,12 +134,8 @@ public void biggerCompressedArray0() { assertSymmetry(c, 0, 16384); c = new Composition(new FastPFOR(), new VariableByte()); assertSymmetry(c, 0, 16384); - } - /** - * a test - */ @Test public void biggerCompressedArray1() { // Compressed array is bigger than original, because of VariableByte. @@ -108,9 +143,6 @@ public void biggerCompressedArray1() { assertSymmetry(c, -1); } - /** - * a test - */ @Test public void biggerCompressedArray2() { // Compressed array is bigger than original, because of Composition. diff --git a/src/test/java/me/lemire/integercompression/BasicTest.java b/src/test/java/me/lemire/integercompression/BasicTest.java index e88293e..b29ae0d 100644 --- a/src/test/java/me/lemire/integercompression/BasicTest.java +++ b/src/test/java/me/lemire/integercompression/BasicTest.java @@ -1,3 +1,10 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + package me.lemire.integercompression; import java.util.Arrays; @@ -22,7 +29,7 @@ */ @SuppressWarnings({ "static-method" }) public class BasicTest { - IntegerCODEC[] codecs = { + final IntegerCODEC[] codecs = { new IntegratedComposition(new IntegratedBinaryPacking(), new IntegratedVariableByte()), new JustCopy(), @@ -41,35 +48,35 @@ public class BasicTest { new GroupSimple9(), new Composition(new XorBinaryPacking(), new VariableByte()), new Composition(new DeltaZigzagBinaryPacking(), - new DeltaZigzagVariableByte()) }; + new DeltaZigzagVariableByte()) }; - /** - * + /** + * This tests with a compressed array with various offset */ - @Test - public void saulTest() { - for (IntegerCODEC C : codecs) { - for (int x = 0; x < 50; ++x) { - int[] a = { 2, 3, 4, 5 }; - int[] b = new int[90]; - int[] c = new int[a.length]; - - IntWrapper aOffset = new IntWrapper(0); - IntWrapper bOffset = new IntWrapper(x); - C.compress(a, aOffset, a.length, b, bOffset); - int len = bOffset.get() - x; - - bOffset.set(x); - IntWrapper cOffset = new IntWrapper(0); - C.uncompress(b, bOffset, len, c, cOffset); - if(!Arrays.equals(a, c)) { - System.out.println("Problem with "+C); - } - assertArrayEquals(a, c); - - } - } - } + @Test + public void saulTest() { + for (IntegerCODEC C : codecs) { + for (int x = 0; x < 50; ++x) { + int[] a = { 2, 3, 4, 5 }; + int[] b = new int[90]; + int[] c = new int[a.length]; + + IntWrapper aOffset = new IntWrapper(0); + IntWrapper bOffset = new IntWrapper(x); + C.compress(a, aOffset, a.length, b, bOffset); + int len = bOffset.get() - x; + + bOffset.set(x); + IntWrapper cOffset = new IntWrapper(0); + C.uncompress(b, bOffset, len, c, cOffset); + if(!Arrays.equals(a, c)) { + System.out.println("Problem with "+C); + } + assertArrayEquals(a, c); + + } + } + } /** * */ diff --git a/src/test/java/me/lemire/integercompression/BoundaryTest.java b/src/test/java/me/lemire/integercompression/BoundaryTest.java index ede2e9f..128b431 100644 --- a/src/test/java/me/lemire/integercompression/BoundaryTest.java +++ b/src/test/java/me/lemire/integercompression/BoundaryTest.java @@ -1,3 +1,10 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + package me.lemire.integercompression; import java.util.Arrays; diff --git a/src/test/java/me/lemire/integercompression/ByteBasicTest.java b/src/test/java/me/lemire/integercompression/ByteBasicTest.java index c2f5b6f..2b2d4f1 100644 --- a/src/test/java/me/lemire/integercompression/ByteBasicTest.java +++ b/src/test/java/me/lemire/integercompression/ByteBasicTest.java @@ -1,3 +1,10 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + package me.lemire.integercompression; import java.util.Arrays; @@ -21,32 +28,32 @@ public class ByteBasicTest { new IntegratedVariableByte(), }; - /** + /** * */ - @Test - public void saulTest() { - for (ByteIntegerCODEC C : codecs) { - for (int x = 0; x < 50 * 4; ++x) { - int[] a = { 2, 3, 4, 5 }; - byte[] b = new byte[90*4]; - int[] c = new int[a.length]; + @Test + public void saulTest() { + for (ByteIntegerCODEC C : codecs) { + for (int x = 0; x < 50 * 4; ++x) { + int[] a = { 2, 3, 4, 5 }; + byte[] b = new byte[90*4]; + int[] c = new int[a.length]; - IntWrapper aOffset = new IntWrapper(0); - IntWrapper bOffset = new IntWrapper(x); - C.compress(a, aOffset, a.length, b, bOffset); - int len = bOffset.get() - x; + IntWrapper aOffset = new IntWrapper(0); + IntWrapper bOffset = new IntWrapper(x); + C.compress(a, aOffset, a.length, b, bOffset); + int len = bOffset.get() - x; - bOffset.set(x); - IntWrapper cOffset = new IntWrapper(0); - C.uncompress(b, bOffset, len, c, cOffset); - if(!Arrays.equals(a, c)) { - System.out.println("Problem with "+C); - } - assertArrayEquals(a, c); - } - } - } + bOffset.set(x); + IntWrapper cOffset = new IntWrapper(0); + C.uncompress(b, bOffset, len, c, cOffset); + if(!Arrays.equals(a, c)) { + System.out.println("Problem with "+C); + } + assertArrayEquals(a, c); + } + } + } /** * */ diff --git a/src/test/java/me/lemire/integercompression/DeltaZigzagEncodingTest.java b/src/test/java/me/lemire/integercompression/DeltaZigzagEncodingTest.java index 5e0923d..ae42c1d 100644 --- a/src/test/java/me/lemire/integercompression/DeltaZigzagEncodingTest.java +++ b/src/test/java/me/lemire/integercompression/DeltaZigzagEncodingTest.java @@ -1,7 +1,10 @@ -/* +/** * This code is released under the * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ */ + package me.lemire.integercompression; import org.junit.Test; diff --git a/src/test/java/me/lemire/integercompression/ExampleTest.java b/src/test/java/me/lemire/integercompression/ExampleTest.java index 300983c..c63c69b 100644 --- a/src/test/java/me/lemire/integercompression/ExampleTest.java +++ b/src/test/java/me/lemire/integercompression/ExampleTest.java @@ -1,3 +1,10 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + package me.lemire.integercompression; import me.lemire.integercompression.differential.*; @@ -10,305 +17,303 @@ * */ public class ExampleTest { - /** - * - */ - @Test - - public void superSimpleExample() { - IntegratedIntCompressor iic = new IntegratedIntCompressor(); - int[] data = new int[2342351]; - for (int k = 0; k < data.length; ++k) - data[k] = k; - System.out.println("Compressing " + data.length + " integers using friendly interface"); - int[] compressed = iic.compress(data); - int[] recov = iic.uncompress(compressed); - System.out - .println("compressed from " + data.length * 4 / 1024 + "KB to " + compressed.length * 4 / 1024 + "KB"); - if (!Arrays.equals(recov, data)) - throw new RuntimeException("bug"); - } - - /** - * - */ - @Test - - public void basicExample() { - int[] data = new int[2342351]; - System.out.println("Compressing " + data.length + " integers in one go"); - // data should be sorted for best - // results - for (int k = 0; k < data.length; ++k) - data[k] = k; - // Very important: the data is in sorted order!!! If not, you - // will get very poor compression with IntegratedBinaryPacking, - // you should use another CODEC. - - // next we compose a CODEC. Most of the processing - // will be done with binary packing, and leftovers will - // be processed using variable byte - IntegratedIntegerCODEC codec = new IntegratedComposition(new IntegratedBinaryPacking(), - new IntegratedVariableByte()); - // output vector should be large enough... - int[] compressed = new int[data.length + 1024]; - // compressed might not be large enough in some cases - // if you get java.lang.ArrayIndexOutOfBoundsException, try - // allocating more memory - - /** - * - * compressing - * - */ - IntWrapper inputoffset = new IntWrapper(0); - IntWrapper outputoffset = new IntWrapper(0); - codec.compress(data, inputoffset, data.length, compressed, outputoffset); - // got it! - // inputoffset should be at data.length but outputoffset tells - // us where we are... - System.out.println( - "compressed from " + data.length * 4 / 1024 + "KB to " + outputoffset.intValue() * 4 / 1024 + "KB"); - // we can repack the data: (optional) - compressed = Arrays.copyOf(compressed, outputoffset.intValue()); - - /** - * - * now uncompressing - * - * This assumes that we otherwise know how many integers have been - * compressed. See basicExampleHeadless for a more general case. - */ - int[] recovered = new int[data.length]; - IntWrapper recoffset = new IntWrapper(0); - codec.uncompress(compressed, new IntWrapper(0), compressed.length, recovered, recoffset); - if (Arrays.equals(data, recovered)) - System.out.println("data is recovered without loss"); - else - throw new RuntimeException("bug"); // could use assert - System.out.println(); - } - - /** - * Like the basicExample, but we store the input array size manually. - */ - @Test - public void basicExampleHeadless() { - int[] data = new int[2342351]; - System.out.println("Compressing " + data.length + " integers in one go using the headless approach"); - // data should be sorted for best - // results - for (int k = 0; k < data.length; ++k) - data[k] = k; - // Very important: the data is in sorted order!!! If not, you - // will get very poor compression with IntegratedBinaryPacking, - // you should use another CODEC. - - // next we compose a CODEC. Most of the processing - // will be done with binary packing, and leftovers will - // be processed using variable byte - SkippableIntegratedComposition codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(), - new IntegratedVariableByte()); - // output vector should be large enough... - int[] compressed = new int[data.length + 1024]; - // compressed might not be large enough in some cases - // if you get java.lang.ArrayIndexOutOfBoundsException, try - // allocating more memory - - /** - * - * compressing - * - */ - IntWrapper inputoffset = new IntWrapper(0); - IntWrapper outputoffset = new IntWrapper(1); - compressed[0] = data.length; // we manually store how many integers we - codec.headlessCompress(data, inputoffset, data.length, compressed, outputoffset, new IntWrapper(0)); - // got it! - // inputoffset should be at data.length but outputoffset tells - // us where we are... - System.out.println( - "compressed from " + data.length * 4 / 1024 + "KB to " + outputoffset.intValue() * 4 / 1024 + "KB"); - // we can repack the data: (optional) - compressed = Arrays.copyOf(compressed, outputoffset.intValue()); - - /** - * - * now uncompressing - * - */ - int howmany = compressed[0];// we manually stored the number of - // compressed integers - int[] recovered = new int[howmany]; - IntWrapper recoffset = new IntWrapper(0); - codec.headlessUncompress(compressed, new IntWrapper(1), compressed.length, recovered, recoffset, howmany, new IntWrapper(0)); - if (Arrays.equals(data, recovered)) - System.out.println("data is recovered without loss"); - else - throw new RuntimeException("bug"); // could use assert - System.out.println(); - } - - /** - * This is an example to show you can compress unsorted integers as long as - * most are small. - */ - @Test - public void unsortedExample() { - final int N = 1333333; - int[] data = new int[N]; - // initialize the data (most will be small - for (int k = 0; k < N; k += 1) - data[k] = 3; - // throw some larger values - for (int k = 0; k < N; k += 5) - data[k] = 100; - for (int k = 0; k < N; k += 533) - data[k] = 10000; - int[] compressed = new int[N + 1024];// could need more - IntegerCODEC codec = new Composition(new FastPFOR(), new VariableByte()); - // compressing - IntWrapper inputoffset = new IntWrapper(0); - IntWrapper outputoffset = new IntWrapper(0); - codec.compress(data, inputoffset, data.length, compressed, outputoffset); - System.out.println("compressed unsorted integers from " + data.length * 4 / 1024 + "KB to " - + outputoffset.intValue() * 4 / 1024 + "KB"); - // we can repack the data: (optional) - compressed = Arrays.copyOf(compressed, outputoffset.intValue()); - - int[] recovered = new int[N]; - IntWrapper recoffset = new IntWrapper(0); - codec.uncompress(compressed, new IntWrapper(0), compressed.length, recovered, recoffset); - if (Arrays.equals(data, recovered)) - System.out.println("data is recovered without loss"); - else - throw new RuntimeException("bug"); // could use assert - System.out.println(); - - } - - /** - * This is like the basic example, but we show how to process larger arrays - * in chunks. - * - * Some of this code was written by Pavel Klinov. - */ - @Test - public void advancedExample() { - int TotalSize = 2342351; // some arbitrary number - int ChunkSize = 16384; // size of each chunk, choose a multiple of 128 - System.out.println("Compressing " + TotalSize + " integers using chunks of " + ChunkSize + " integers (" - + ChunkSize * 4 / 1024 + "KB)"); - System.out.println("(It is often better for applications to work in chunks fitting in CPU cache.)"); - int[] data = new int[TotalSize]; - // data should be sorted for best - // results - for (int k = 0; k < data.length; ++k) - data[k] = k; - // next we compose a CODEC. Most of the processing - // will be done with binary packing, and leftovers will - // be processed using variable byte, using variable byte - // only for the last chunk! - IntegratedIntegerCODEC regularcodec = new IntegratedBinaryPacking(); - IntegratedVariableByte ivb = new IntegratedVariableByte(); - IntegratedIntegerCODEC lastcodec = new IntegratedComposition(regularcodec, ivb); - // output vector should be large enough... - int[] compressed = new int[TotalSize + 1024]; - - /** - * - * compressing - * - */ - IntWrapper inputoffset = new IntWrapper(0); - IntWrapper outputoffset = new IntWrapper(0); - for (int k = 0; k < TotalSize / ChunkSize; ++k) - regularcodec.compress(data, inputoffset, ChunkSize, compressed, outputoffset); - lastcodec.compress(data, inputoffset, TotalSize % ChunkSize, compressed, outputoffset); - // got it! - // inputoffset should be at data.length but outputoffset tells - // us where we are... - System.out.println( - "compressed from " + data.length * 4 / 1024 + "KB to " + outputoffset.intValue() * 4 / 1024 + "KB"); - // we can repack the data: - compressed = Arrays.copyOf(compressed, outputoffset.intValue()); - - /** - * - * now uncompressing - * - * We are *not* assuming that the original array length is known, - * however we assume that the chunk size (ChunkSize) is known. - * - */ - int[] recovered = new int[ChunkSize]; - IntWrapper compoff = new IntWrapper(0); - IntWrapper recoffset; - int currentpos = 0; - - while (compoff.get() < compressed.length) { - recoffset = new IntWrapper(0); - regularcodec.uncompress(compressed, compoff, compressed.length - compoff.get(), recovered, recoffset); - - if (recoffset.get() < ChunkSize) {// last chunk detected - ivb.uncompress(compressed, compoff, compressed.length - compoff.get(), recovered, recoffset); - } - for (int i = 0; i < recoffset.get(); ++i) { - if (data[currentpos + i] != recovered[i]) - throw new RuntimeException("bug"); // could use assert - } - currentpos += recoffset.get(); - } - System.out.println("data is recovered without loss"); - System.out.println(); - - } - - /** - * Demo of the headless approach where we must supply the array length - */ - @Test - public void headlessDemo() { - System.out.println("Compressing arrays with minimal header..."); - int[] uncompressed1 = { 1, 2, 1, 3, 1 }; - int[] uncompressed2 = { 3, 2, 4, 6, 1 }; - - int[] compressed = new int[uncompressed1.length + uncompressed2.length + 1024]; - - SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte()); - - // compressing - IntWrapper outPos = new IntWrapper(); - - IntWrapper previous = new IntWrapper(); - - codec.headlessCompress(uncompressed1, new IntWrapper(), uncompressed1.length, compressed, outPos); - int length1 = outPos.get() - previous.get(); - previous = new IntWrapper(outPos.get()); - codec.headlessCompress(uncompressed2, new IntWrapper(), uncompressed2.length, compressed, outPos); - int length2 = outPos.get() - previous.get(); - - compressed = Arrays.copyOf(compressed, length1 + length2); - System.out - .println("compressed unsorted integers from " + uncompressed1.length * 4 + "B to " + length1 * 4 + "B"); - System.out - .println("compressed unsorted integers from " + uncompressed2.length * 4 + "B to " + length2 * 4 + "B"); - System.out.println("Total compressed output " + compressed.length); - - int[] recovered1 = new int[uncompressed1.length]; - int[] recovered2 = new int[uncompressed1.length]; - IntWrapper inPos = new IntWrapper(); - System.out.println("Decoding first array starting at pos = " + inPos); - codec.headlessUncompress(compressed, inPos, compressed.length, recovered1, new IntWrapper(0), - uncompressed1.length); - System.out.println("Decoding second array starting at pos = " + inPos); - codec.headlessUncompress(compressed, inPos, compressed.length, recovered2, new IntWrapper(0), - uncompressed2.length); - if (!Arrays.equals(uncompressed1, recovered1)) - throw new RuntimeException("First array does not match."); - if (!Arrays.equals(uncompressed2, recovered2)) - throw new RuntimeException("Second array does not match."); - System.out.println("The arrays match, your code is probably ok."); - - } + /** + * + */ + @Test + + public void superSimpleExample() { + IntegratedIntCompressor iic = new IntegratedIntCompressor(); + int[] data = new int[2342351]; + for (int k = 0; k < data.length; ++k) + data[k] = k; + System.out.println("Compressing " + data.length + " integers using friendly interface"); + int[] compressed = iic.compress(data); + int[] recov = iic.uncompress(compressed); + System.out + .println("compressed from " + data.length * 4 / 1024 + "KB to " + compressed.length * 4 / 1024 + "KB"); + if (!Arrays.equals(recov, data)) + throw new RuntimeException("bug"); + } + + /** + * + */ + @Test + + public void basicExample() { + int[] data = new int[2342351]; + System.out.println("Compressing " + data.length + " integers in one go"); + // data should be sorted for best + // results + for (int k = 0; k < data.length; ++k) + data[k] = k; + // Very important: the data is in sorted order!!! If not, you + // will get very poor compression with IntegratedBinaryPacking, + // you should use another CODEC. + + // next we compose a CODEC. Most of the processing + // will be done with binary packing, and leftovers will + // be processed using variable byte + IntegratedIntegerCODEC codec = new IntegratedComposition(new IntegratedBinaryPacking(), + new IntegratedVariableByte()); + // output vector should be large enough... + int[] compressed = new int[data.length + 1024]; + // compressed might not be large enough in some cases + // if you get java.lang.ArrayIndexOutOfBoundsException, try + // allocating more memory + + /** + * + * compressing + * + */ + IntWrapper inputoffset = new IntWrapper(0); + IntWrapper outputoffset = new IntWrapper(0); + codec.compress(data, inputoffset, data.length, compressed, outputoffset); + // got it! + // inputoffset should be at data.length but outputoffset tells + // us where we are... + System.out.println( + "compressed from " + data.length * 4 / 1024 + "KB to " + outputoffset.intValue() * 4 / 1024 + "KB"); + // we can repack the data: (optional) + compressed = Arrays.copyOf(compressed, outputoffset.intValue()); + + /** + * + * now uncompressing + * + * This assumes that we otherwise know how many integers have been + * compressed. See basicExampleHeadless for a more general case. + */ + int[] recovered = new int[data.length]; + IntWrapper recoffset = new IntWrapper(0); + codec.uncompress(compressed, new IntWrapper(0), compressed.length, recovered, recoffset); + if (Arrays.equals(data, recovered)) + System.out.println("data is recovered without loss"); + else + throw new RuntimeException("bug"); // could use assert + System.out.println(); + } + + /** + * Like the basicExample, but we store the input array size manually. + */ + @Test + public void basicExampleHeadless() { + int[] data = new int[2342351]; + System.out.println("Compressing " + data.length + " integers in one go using the headless approach"); + // data should be sorted for best + // results + for (int k = 0; k < data.length; ++k) + data[k] = k; + // Very important: the data is in sorted order!!! If not, you + // will get very poor compression with IntegratedBinaryPacking, + // you should use another CODEC. + + // next we compose a CODEC. Most of the processing + // will be done with binary packing, and leftovers will + // be processed using variable byte + SkippableIntegratedComposition codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(), + new IntegratedVariableByte()); + int[] compressed = new int[codec.maxHeadlessCompressedLength(new IntWrapper(0), data.length)]; + + /** + * + * compressing + * + */ + IntWrapper inputoffset = new IntWrapper(0); + IntWrapper outputoffset = new IntWrapper(1); + compressed[0] = data.length; // we manually store how many integers we + codec.headlessCompress(data, inputoffset, data.length, compressed, outputoffset, new IntWrapper(0)); + // got it! + // inputoffset should be at data.length but outputoffset tells + // us where we are... + System.out.println( + "compressed from " + data.length * 4 / 1024 + "KB to " + outputoffset.intValue() * 4 / 1024 + "KB"); + // we can repack the data: (optional) + compressed = Arrays.copyOf(compressed, outputoffset.intValue()); + + /** + * + * now uncompressing + * + */ + int howmany = compressed[0];// we manually stored the number of + // compressed integers + int[] recovered = new int[howmany]; + IntWrapper recoffset = new IntWrapper(0); + codec.headlessUncompress(compressed, new IntWrapper(1), compressed.length, recovered, recoffset, howmany, new IntWrapper(0)); + if (Arrays.equals(data, recovered)) + System.out.println("data is recovered without loss"); + else + throw new RuntimeException("bug"); // could use assert + System.out.println(); + } + + /** + * This is an example to show you can compress unsorted integers as long as + * most are small. + */ + @Test + public void unsortedExample() { + final int N = 1333333; + int[] data = new int[N]; + // initialize the data (most will be small + for (int k = 0; k < N; k += 1) + data[k] = 3; + // throw some larger values + for (int k = 0; k < N; k += 5) + data[k] = 100; + for (int k = 0; k < N; k += 533) + data[k] = 10000; + int[] compressed = new int[N + 1024];// could need more + IntegerCODEC codec = new Composition(new FastPFOR(), new VariableByte()); + // compressing + IntWrapper inputoffset = new IntWrapper(0); + IntWrapper outputoffset = new IntWrapper(0); + codec.compress(data, inputoffset, data.length, compressed, outputoffset); + System.out.println("compressed unsorted integers from " + data.length * 4 / 1024 + "KB to " + + outputoffset.intValue() * 4 / 1024 + "KB"); + // we can repack the data: (optional) + compressed = Arrays.copyOf(compressed, outputoffset.intValue()); + + int[] recovered = new int[N]; + IntWrapper recoffset = new IntWrapper(0); + codec.uncompress(compressed, new IntWrapper(0), compressed.length, recovered, recoffset); + if (Arrays.equals(data, recovered)) + System.out.println("data is recovered without loss"); + else + throw new RuntimeException("bug"); // could use assert + System.out.println(); + + } + + /** + * This is like the basic example, but we show how to process larger arrays + * in chunks. + * + * Some of this code was written by Pavel Klinov. + */ + @Test + public void advancedExample() { + int TotalSize = 2342351; // some arbitrary number + int ChunkSize = 16384; // size of each chunk, choose a multiple of 128 + System.out.println("Compressing " + TotalSize + " integers using chunks of " + ChunkSize + " integers (" + + ChunkSize * 4 / 1024 + "KB)"); + System.out.println("(It is often better for applications to work in chunks fitting in CPU cache.)"); + int[] data = new int[TotalSize]; + // data should be sorted for best + // results + for (int k = 0; k < data.length; ++k) + data[k] = k; + // next we compose a CODEC. Most of the processing + // will be done with binary packing, and leftovers will + // be processed using variable byte, using variable byte + // only for the last chunk! + IntegratedIntegerCODEC regularcodec = new IntegratedBinaryPacking(); + IntegratedVariableByte ivb = new IntegratedVariableByte(); + IntegratedIntegerCODEC lastcodec = new IntegratedComposition(regularcodec, ivb); + // output vector should be large enough... + int[] compressed = new int[TotalSize + 1024]; + + /** + * + * compressing + * + */ + IntWrapper inputoffset = new IntWrapper(0); + IntWrapper outputoffset = new IntWrapper(0); + for (int k = 0; k < TotalSize / ChunkSize; ++k) + regularcodec.compress(data, inputoffset, ChunkSize, compressed, outputoffset); + lastcodec.compress(data, inputoffset, TotalSize % ChunkSize, compressed, outputoffset); + // got it! + // inputoffset should be at data.length but outputoffset tells + // us where we are... + System.out.println( + "compressed from " + data.length * 4 / 1024 + "KB to " + outputoffset.intValue() * 4 / 1024 + "KB"); + // we can repack the data: + compressed = Arrays.copyOf(compressed, outputoffset.intValue()); + + /** + * + * now uncompressing + * + * We are *not* assuming that the original array length is known, + * however we assume that the chunk size (ChunkSize) is known. + * + */ + int[] recovered = new int[ChunkSize]; + IntWrapper compoff = new IntWrapper(0); + IntWrapper recoffset; + int currentpos = 0; + + while (compoff.get() < compressed.length) { + recoffset = new IntWrapper(0); + regularcodec.uncompress(compressed, compoff, compressed.length - compoff.get(), recovered, recoffset); + + if (recoffset.get() < ChunkSize) {// last chunk detected + ivb.uncompress(compressed, compoff, compressed.length - compoff.get(), recovered, recoffset); + } + for (int i = 0; i < recoffset.get(); ++i) { + if (data[currentpos + i] != recovered[i]) + throw new RuntimeException("bug"); // could use assert + } + currentpos += recoffset.get(); + } + System.out.println("data is recovered without loss"); + System.out.println(); + + } + + /** + * Demo of the headless approach where we must supply the array length + */ + @Test + public void headlessDemo() { + System.out.println("Compressing arrays with minimal header..."); + int[] uncompressed1 = { 1, 2, 1, 3, 1 }; + int[] uncompressed2 = { 3, 2, 4, 6, 1 }; + + SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte()); + + int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed1.length) + + codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed2.length); + int[] compressed = new int[maxCompressedLength]; + + // compressing + IntWrapper outPos = new IntWrapper(); + + IntWrapper previous = new IntWrapper(); + + codec.headlessCompress(uncompressed1, new IntWrapper(), uncompressed1.length, compressed, outPos); + int length1 = outPos.get() - previous.get(); + previous = new IntWrapper(outPos.get()); + codec.headlessCompress(uncompressed2, new IntWrapper(), uncompressed2.length, compressed, outPos); + int length2 = outPos.get() - previous.get(); + + compressed = Arrays.copyOf(compressed, length1 + length2); + System.out + .println("compressed unsorted integers from " + uncompressed1.length * 4 + "B to " + length1 * 4 + "B"); + System.out + .println("compressed unsorted integers from " + uncompressed2.length * 4 + "B to " + length2 * 4 + "B"); + System.out.println("Total compressed output " + compressed.length); + + int[] recovered1 = new int[uncompressed1.length]; + int[] recovered2 = new int[uncompressed1.length]; + IntWrapper inPos = new IntWrapper(); + System.out.println("Decoding first array starting at pos = " + inPos); + codec.headlessUncompress(compressed, inPos, compressed.length, recovered1, new IntWrapper(0), + uncompressed1.length); + System.out.println("Decoding second array starting at pos = " + inPos); + codec.headlessUncompress(compressed, inPos, compressed.length, recovered2, new IntWrapper(0), + uncompressed2.length); + if (!Arrays.equals(uncompressed1, recovered1)) + throw new RuntimeException("First array does not match."); + if (!Arrays.equals(uncompressed2, recovered2)) + throw new RuntimeException("Second array does not match."); + System.out.println("The arrays match, your code is probably ok."); + + } } diff --git a/src/test/java/me/lemire/integercompression/IntCompressorTest.java b/src/test/java/me/lemire/integercompression/IntCompressorTest.java index 34b8946..79e51fc 100644 --- a/src/test/java/me/lemire/integercompression/IntCompressorTest.java +++ b/src/test/java/me/lemire/integercompression/IntCompressorTest.java @@ -1,3 +1,10 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + package me.lemire.integercompression; import java.util.Arrays; diff --git a/src/test/java/me/lemire/integercompression/ResourcedTest.java b/src/test/java/me/lemire/integercompression/ResourcedTest.java index 61b8e58..8316129 100644 --- a/src/test/java/me/lemire/integercompression/ResourcedTest.java +++ b/src/test/java/me/lemire/integercompression/ResourcedTest.java @@ -1,3 +1,10 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + package me.lemire.integercompression; import java.util.ArrayList; @@ -17,65 +24,65 @@ * */ public class ResourcedTest { - SkippableIntegerCODEC[] codecs = { new JustCopy(), new VariableByte(), - new SkippableComposition(new BinaryPacking(), new VariableByte()), - new SkippableComposition(new NewPFD(), new VariableByte()), - new SkippableComposition(new NewPFDS9(), new VariableByte()), - new SkippableComposition(new NewPFDS16(), new VariableByte()), - new SkippableComposition(new OptPFD(), new VariableByte()), - new SkippableComposition(new OptPFDS9(), new VariableByte()), - new SkippableComposition(new OptPFDS16(), new VariableByte()), - new SkippableComposition(new FastPFOR128(), new VariableByte()), - new SkippableComposition(new FastPFOR(), new VariableByte()), new Simple9(), new Simple16() }; + SkippableIntegerCODEC[] codecs = { new JustCopy(), new VariableByte(), + new SkippableComposition(new BinaryPacking(), new VariableByte()), + new SkippableComposition(new NewPFD(), new VariableByte()), + new SkippableComposition(new NewPFDS9(), new VariableByte()), + new SkippableComposition(new NewPFDS16(), new VariableByte()), + new SkippableComposition(new OptPFD(), new VariableByte()), + new SkippableComposition(new OptPFDS9(), new VariableByte()), + new SkippableComposition(new OptPFDS16(), new VariableByte()), + new SkippableComposition(new FastPFOR128(), new VariableByte()), + new SkippableComposition(new FastPFOR(), new VariableByte()), new Simple9(), new Simple16() }; - /** - * @throws IOException - * if the resource cannot be accessed (should be considered a - * bug) - * - */ - @Test - public void IntCompressorTest() throws IOException { - // next line requires Java8? - // int[] data = - // Files.lines(Paths.get("integers.txt")).mapToInt(Integer::parseInt).toArray(); - File f = new File("src/test/resources/integers.txt"); - System.out.println("loading test data from "+ f.getAbsolutePath()); - BufferedReader bfr = new BufferedReader(new FileReader(f)); - String line; - ArrayList ai = new ArrayList(); - while ((line = bfr.readLine()) != null) { - ai.add(Integer.parseInt(line)); - } - bfr.close(); - int[] data = new int[ai.size()]; - for (int k = 0; k < data.length; ++k) - data[k] = ai.get(k).intValue(); - ai = null; - // finally! - { - IntegratedIntCompressor iic = new IntegratedIntCompressor(); - int[] compressed = iic.compress(data); - int[] recovered = iic.uncompress(compressed); - Assert.assertArrayEquals(recovered, data); - } - for (SkippableIntegerCODEC C : codecs) { - IntCompressor iic = new IntCompressor(C); - int[] compressed = iic.compress(data); - int[] recovered = iic.uncompress(compressed); - Assert.assertArrayEquals(recovered, data); + /** + * @throws IOException + * if the resource cannot be accessed (should be considered a + * bug) + * + */ + @Test + public void IntCompressorTest() throws IOException { + // next line requires Java8? + // int[] data = + // Files.lines(Paths.get("integers.txt")).mapToInt(Integer::parseInt).toArray(); + File f = new File("src/test/resources/integers.txt"); + System.out.println("loading test data from "+ f.getAbsolutePath()); + BufferedReader bfr = new BufferedReader(new FileReader(f)); + String line; + ArrayList ai = new ArrayList(); + while ((line = bfr.readLine()) != null) { + ai.add(Integer.parseInt(line)); + } + bfr.close(); + int[] data = new int[ai.size()]; + for (int k = 0; k < data.length; ++k) + data[k] = ai.get(k).intValue(); + ai = null; + // finally! + { + IntegratedIntCompressor iic = new IntegratedIntCompressor(); + int[] compressed = iic.compress(data); + int[] recovered = iic.uncompress(compressed); + Assert.assertArrayEquals(recovered, data); + } + for (SkippableIntegerCODEC C : codecs) { + IntCompressor iic = new IntCompressor(C); + int[] compressed = iic.compress(data); + int[] recovered = iic.uncompress(compressed); + Assert.assertArrayEquals(recovered, data); - } - for (SkippableIntegerCODEC C : codecs) { - if (C instanceof SkippableIntegratedIntegerCODEC) { - IntegratedIntCompressor iic = new IntegratedIntCompressor((SkippableIntegratedIntegerCODEC) C); - int[] compressed = iic.compress(data); - int[] recovered = iic.uncompress(compressed); - Assert.assertArrayEquals(recovered, data); - } + } + for (SkippableIntegerCODEC C : codecs) { + if (C instanceof SkippableIntegratedIntegerCODEC) { + IntegratedIntCompressor iic = new IntegratedIntCompressor((SkippableIntegratedIntegerCODEC) C); + int[] compressed = iic.compress(data); + int[] recovered = iic.uncompress(compressed); + Assert.assertArrayEquals(recovered, data); + } - } + } - } + } } diff --git a/src/test/java/me/lemire/integercompression/SkippableBasicTest.java b/src/test/java/me/lemire/integercompression/SkippableBasicTest.java index d965992..881dada 100644 --- a/src/test/java/me/lemire/integercompression/SkippableBasicTest.java +++ b/src/test/java/me/lemire/integercompression/SkippableBasicTest.java @@ -1,9 +1,22 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + package me.lemire.integercompression; import java.util.Arrays; +import me.lemire.integercompression.differential.IntegratedBinaryPacking; +import me.lemire.integercompression.differential.IntegratedVariableByte; +import me.lemire.integercompression.differential.SkippableIntegratedComposition; +import me.lemire.integercompression.differential.SkippableIntegratedIntegerCODEC; import org.junit.Test; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertTrue; /** * Just some basic sanity tests. @@ -12,7 +25,7 @@ */ @SuppressWarnings({ "static-method" }) public class SkippableBasicTest { - SkippableIntegerCODEC[] codecs = { + final SkippableIntegerCODEC[] codecs = { new JustCopy(), new VariableByte(), new SkippableComposition(new BinaryPacking(), new VariableByte()), @@ -41,10 +54,11 @@ public void consistentTest() { for (SkippableIntegerCODEC c : codecs) { System.out.println("[SkippeableBasicTest.consistentTest] codec = " + c); - int[] outBuf = new int[N + 1024]; for (int n = 0; n <= N; ++n) { IntWrapper inPos = new IntWrapper(); IntWrapper outPos = new IntWrapper(); + int[] outBuf = new int[c.maxHeadlessCompressedLength(new IntWrapper(0), n)]; + c.headlessCompress(data, inPos, n, outBuf, outPos); IntWrapper inPoso = new IntWrapper(); @@ -140,5 +154,135 @@ public void varyingLengthTest2() { } } + @Test + public void testMaxHeadlessCompressedLength() { + testMaxHeadlessCompressedLength(new IntegratedBinaryPacking(), 16 * IntegratedBinaryPacking.BLOCK_SIZE); + testMaxHeadlessCompressedLength(new IntegratedVariableByte(), 128); + testMaxHeadlessCompressedLength(new SkippableIntegratedComposition(new IntegratedBinaryPacking(), new IntegratedVariableByte()), 16 * IntegratedBinaryPacking.BLOCK_SIZE + 10); + + testMaxHeadlessCompressedLength(new BinaryPacking(), 16 * BinaryPacking.BLOCK_SIZE, 32); + testMaxHeadlessCompressedLength(new VariableByte(), 128, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new BinaryPacking(), new VariableByte()), 16 * BinaryPacking.BLOCK_SIZE + 10, 32); + testMaxHeadlessCompressedLength(new JustCopy(), 128, 32); + testMaxHeadlessCompressedLength(new Simple9(), 128, 28); + testMaxHeadlessCompressedLength(new Simple16(), 128, 28); + testMaxHeadlessCompressedLength(new GroupSimple9(), 128, 28); + testMaxHeadlessCompressedLength(new OptPFD(), 4 * OptPFD.BLOCK_SIZE, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new OptPFD(), new VariableByte()), 4 * OptPFD.BLOCK_SIZE + 10, 32); + testMaxHeadlessCompressedLength(new OptPFDS9(), 4 * OptPFDS9.BLOCK_SIZE, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new OptPFDS9(), new VariableByte()), 4 * OptPFDS9.BLOCK_SIZE + 10, 32); + testMaxHeadlessCompressedLength(new OptPFDS16(), 4 * OptPFDS16.BLOCK_SIZE, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new OptPFDS9(), new VariableByte()), 4 * OptPFDS16.BLOCK_SIZE + 10, 32); + testMaxHeadlessCompressedLength(new NewPFD(), 4 * NewPFD.BLOCK_SIZE, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new NewPFD(), new VariableByte()), 4 * NewPFD.BLOCK_SIZE + 10, 32); + testMaxHeadlessCompressedLength(new NewPFDS9(), 4 * NewPFDS9.BLOCK_SIZE, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new NewPFDS9(), new VariableByte()), 4 * NewPFDS9.BLOCK_SIZE + 10, 32); + testMaxHeadlessCompressedLength(new NewPFDS16(), 4 * NewPFDS16.BLOCK_SIZE, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new NewPFDS16(), new VariableByte()), 4 * NewPFDS16.BLOCK_SIZE + 10, 32); + + int fastPfor128PageSize = FastPFOR128.BLOCK_SIZE * 4; // smaller page size than the default to speed up the test + testMaxHeadlessCompressedLength(new FastPFOR128(fastPfor128PageSize), 2 * fastPfor128PageSize, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new FastPFOR128(fastPfor128PageSize), new VariableByte()), 2 * fastPfor128PageSize + 10, 32); + int fastPforPageSize = FastPFOR.BLOCK_SIZE * 4; // smaller page size than the default to speed up the test + testMaxHeadlessCompressedLength(new FastPFOR(fastPforPageSize), 2 * fastPforPageSize, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new FastPFOR(fastPforPageSize), new VariableByte()), 2 * fastPforPageSize + 10, 32); + } + + private static void testMaxHeadlessCompressedLength(SkippableIntegratedIntegerCODEC codec, int inlengthTo) { + // We test the worst-case scenario by making all deltas and the initial value negative. + int delta = -1; + int value = delta; + + for (int inlength = 0; inlength < inlengthTo; ++inlength) { + int[] input = new int[inlength]; + for (int i = 0; i < inlength; i++) { + input[i] = value; + value += delta; + } + + int maxOutputLength = codec.maxHeadlessCompressedLength(new IntWrapper(), inlength); + int[] output = new int[maxOutputLength]; + IntWrapper outPos = new IntWrapper(); + + codec.headlessCompress(input, new IntWrapper(), inlength, output, outPos, new IntWrapper()); + // If we reach this point, no exception was thrown, which means the calculated output length was sufficient. + + assertTrue(maxOutputLength <= outPos.get() + 1); // +1 because SkippableIntegratedComposition always adds one extra integer for the potential header + } + } + + private static void testMaxHeadlessCompressedLength(SkippableIntegerCODEC codec, int inlengthTo, int maxBitWidth) { + // Some schemes ignore bit widths between 21 and 31. Therefore, in addition to maxBitWidth - 1, we also test 20. + assertTrue(maxBitWidth >= 20); + int[] regularValueBitWidths = { 20, maxBitWidth - 1 }; + + for (int inlength = 0; inlength < inlengthTo; ++inlength) { + int[] input = new int[inlength]; + + int maxOutputLength = codec.maxHeadlessCompressedLength(new IntWrapper(), inlength); + int[] output = new int[maxOutputLength]; + + for (int exceptionCount = 0; exceptionCount < inlength; exceptionCount++) { + int exception = maxBitWidth == 32 ? -1 : (1 << maxBitWidth) - 1; + + for (int regularValueBitWidth : regularValueBitWidths) { + int regularValue = regularValueBitWidth == 32 ? -1 : (1 << regularValueBitWidth) - 1; + + Arrays.fill(input, 0, exceptionCount, exception); + Arrays.fill(input, exceptionCount, input.length, regularValue); + + codec.headlessCompress(input, new IntWrapper(), inlength, output, new IntWrapper()); + // If we reach this point, no exception was thrown, which means the calculated output length was sufficient. + } + } + } + } + + @Test + public void testUncompressOutputOffset_SkippableComposition() { + for (int offset : new int[] {0, 1, 6}) { + SkippableComposition codec = new SkippableComposition(new BinaryPacking(), new VariableByte()); + + int[] input = { 2, 3, 4, 5 }; + int[] compressed = new int[codec.maxHeadlessCompressedLength(new IntWrapper(0), input.length)]; + int[] uncompressed = new int[offset + input.length]; + + IntWrapper inputOffset = new IntWrapper(0); + IntWrapper compressedOffset = new IntWrapper(0); + + codec.headlessCompress(input, inputOffset, input.length, compressed, compressedOffset); + + int compressedLength = compressedOffset.get(); + IntWrapper uncompressedOffset = new IntWrapper(offset); + compressedOffset = new IntWrapper(0); + codec.headlessUncompress(compressed, compressedOffset, compressedLength, uncompressed, uncompressedOffset, input.length); + + assertArrayEquals(input, Arrays.copyOfRange(uncompressed, offset, offset + input.length)); + } + } + + @Test + public void testUncompressOutputOffset_SkippableIntegratedComposition() { + for (int offset : new int[] {0, 1, 6}) { + SkippableIntegratedComposition codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(), new IntegratedVariableByte()); + + int[] input = { 2, 3, 4, 5 }; + int[] compressed = new int[codec.maxHeadlessCompressedLength(new IntWrapper(0), input.length)]; + int[] uncompressed = new int[offset + input.length]; + IntWrapper inputOffset = new IntWrapper(0); + IntWrapper compressedOffset = new IntWrapper(0); + IntWrapper initValue = new IntWrapper(0); + + codec.headlessCompress(input, inputOffset, input.length, compressed, compressedOffset, initValue); + + int compressedLength = compressedOffset.get(); + IntWrapper uncompressedOffset = new IntWrapper(offset); + compressedOffset = new IntWrapper(0); + initValue = new IntWrapper(0); + codec.headlessUncompress(compressed, compressedOffset, compressedLength, uncompressed, uncompressedOffset, input.length, initValue); + + assertArrayEquals(input, Arrays.copyOfRange(uncompressed, offset, offset + input.length)); + } + } } diff --git a/src/test/java/me/lemire/integercompression/TestUtils.java b/src/test/java/me/lemire/integercompression/TestUtils.java index a0820ab..b3cbff3 100644 --- a/src/test/java/me/lemire/integercompression/TestUtils.java +++ b/src/test/java/me/lemire/integercompression/TestUtils.java @@ -1,3 +1,10 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + package me.lemire.integercompression; import java.util.Arrays; @@ -123,7 +130,7 @@ public static void assertSymmetry(IntegerCODEC codec, int... orig) { assertArrayEquals(orig, target); } - protected static int[] compress(IntegerCODEC codec, int[] data) { + public static int[] compress(IntegerCODEC codec, int[] data) { int[] outBuf = new int[data.length * 4]; IntWrapper inPos = new IntWrapper(); IntWrapper outPos = new IntWrapper(); @@ -158,7 +165,7 @@ protected static int[] uncompress(ByteIntegerCODEC codec, byte[] data, int len) } protected static int[] compressHeadless(SkippableIntegerCODEC codec, int[] data) { - int[] outBuf = new int[data.length * 4]; + int[] outBuf = new int[codec.maxHeadlessCompressedLength(new IntWrapper(0), data.length)]; IntWrapper inPos = new IntWrapper(); IntWrapper outPos = new IntWrapper(); codec.headlessCompress(data, inPos, data.length, outBuf, outPos); diff --git a/src/test/java/me/lemire/integercompression/XorBinaryPackingTest.java b/src/test/java/me/lemire/integercompression/XorBinaryPackingTest.java index 3201b02..650eb4b 100644 --- a/src/test/java/me/lemire/integercompression/XorBinaryPackingTest.java +++ b/src/test/java/me/lemire/integercompression/XorBinaryPackingTest.java @@ -1,7 +1,10 @@ /** * This code is released under the * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ */ + package me.lemire.integercompression; import java.util.Arrays; diff --git a/src/test/java/me/lemire/longcompression/ATestLongCODEC.java b/src/test/java/me/lemire/longcompression/ATestLongCODEC.java new file mode 100644 index 0000000..c61ea69 --- /dev/null +++ b/src/test/java/me/lemire/longcompression/ATestLongCODEC.java @@ -0,0 +1,96 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +package me.lemire.longcompression; + +import java.util.stream.LongStream; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Edge-cases to be tested on a per-codec basis + * + * @author Benoit Lacelle + */ +public abstract class ATestLongCODEC { + protected void checkConsistency(LongCODEC codec, long[] array) { + { + long[] compressed = LongTestUtils.compress(codec, array); + long[] uncompressed = LongTestUtils.uncompress(codec, compressed, array.length); + + Assert.assertArrayEquals(array, uncompressed); + } + + if (codec instanceof ByteLongCODEC) { + byte[] compressed = LongTestUtils.compress((ByteLongCODEC) codec, array); + long[] uncompressed = LongTestUtils.uncompress((ByteLongCODEC) codec, compressed, array.length); + + Assert.assertArrayEquals(array, uncompressed); + } + + if (codec instanceof SkippableLongCODEC) { + long[] compressed = LongTestUtils.compressHeadless((SkippableLongCODEC) codec, array); + long[] uncompressed = + LongTestUtils.uncompressHeadless((SkippableLongCODEC) codec, compressed, array.length); + + Assert.assertArrayEquals(array, uncompressed); + } + } + + public abstract LongCODEC getCodec(); + + @Test + public void testCodec_Minus1() { + checkConsistency(getCodec(), new long[] { -1 }); + } + + @Test + public void testCodec_ZeroTimes8Minus1() { + checkConsistency(getCodec(), new long[] { 0, 0, 0, 0, 0, 0, 0, 0, -1 }); + } + + @Test + public void testCodec_ZeroTimes127Minus1() { + long[] array = LongStream.concat(LongStream.range(0, 127).map(l -> 0), LongStream.of(-1)).toArray(); + + checkConsistency(getCodec(), array); + } + + @Test + public void testCodec_ZeroTimes128Minus1() { + long[] array = LongStream.concat(LongStream.range(0, 128).map(l -> 0), LongStream.of(-1)).toArray(); + + checkConsistency(getCodec(), array); + } + + @Test + public void testCodec_MinValue() { + checkConsistency(getCodec(), new long[] { Long.MIN_VALUE }); + } + + @Test + public void testCodec_ZeroMinValue() { + checkConsistency(getCodec(), new long[] { 0, Long.MIN_VALUE }); + } + + @Test + public void testCodec_allPowerOfTwo() { + checkConsistency(getCodec(), new long[] { 1L << 42 }); + for (int i = 0; i < 64; i++) { + checkConsistency(getCodec(), new long[] { 1L << i }); + } + } + + @Test + public void testCodec_ZeroThenAllPowerOfTwo() { + for (int i = 0; i < 64; i++) { + checkConsistency(getCodec(), new long[] { 0, 1L << i }); + } + } + +} diff --git a/src/test/java/me/lemire/longcompression/LongBasicTest.java b/src/test/java/me/lemire/longcompression/LongBasicTest.java new file mode 100644 index 0000000..8dc0c9b --- /dev/null +++ b/src/test/java/me/lemire/longcompression/LongBasicTest.java @@ -0,0 +1,391 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +package me.lemire.longcompression; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.util.Arrays; + +import org.junit.Test; + +import me.lemire.integercompression.FastPFOR; +import me.lemire.integercompression.FastPFOR128; +import me.lemire.integercompression.IntWrapper; +import me.lemire.longcompression.differential.LongDelta; +import me.lemire.longcompression.synth.LongClusteredDataGenerator; + +/** + * Just some basic sanity tests. + * + * @author Benoit Lacelle + */ +@SuppressWarnings({ "static-method" }) +public class LongBasicTest { + final LongCODEC[] codecs = { + new LongJustCopy(), + new LongVariableByte(), + new LongAs2IntsCodec(), + new LongComposition(new LongBinaryPacking(), new LongVariableByte()), + }; + + /** + * This tests with a compressed array with various offset + */ + @Test + public void saulTest() { + for (LongCODEC C : codecs) { + for (int x = 0; x < 50; ++x) { + long[] a = { 2, 3, 4, 5 }; + long[] b = new long[90]; + long[] c = new long[a.length]; + + IntWrapper aOffset = new IntWrapper(0); + IntWrapper bOffset = new IntWrapper(x); + C.compress(a, aOffset, a.length, b, bOffset); + int len = bOffset.get() - x; + + bOffset.set(x); + IntWrapper cOffset = new IntWrapper(0); + C.uncompress(b, bOffset, len, c, cOffset); + if(!Arrays.equals(a, c)) { + System.out.println("Problem with "+C); + } + assertArrayEquals(a, c); + + } + } + } + /** + * + */ + @Test + public void varyingLengthTest() { + int N = 4096; + long[] data = new long[N]; + for (int k = 0; k < N; ++k) + data[k] = k; + for (LongCODEC c : codecs) { + System.out.println("[BasicTest.varyingLengthTest] codec = " + c); + for (int L = 1; L <= 128; L++) { + long[] comp = LongTestUtils.compress(c, Arrays.copyOf(data, L)); + long[] answer = LongTestUtils.uncompress(c, comp, L); + for (int k = 0; k < L; ++k) + if (answer[k] != data[k]) { + long[] comp2 = LongTestUtils.compress(c, Arrays.copyOf(data, L)); + long[] answer2 = LongTestUtils.uncompress(c, comp2, L); + throw new RuntimeException("bug"); + } + } + for (int L = 128; L <= N; L *= 2) { + long[] comp = LongTestUtils.compress(c, Arrays.copyOf(data, L)); + long[] answer = LongTestUtils.uncompress(c, comp, L); + for (int k = 0; k < L; ++k) + if (answer[k] != data[k]) { + long[] comp2 = LongTestUtils.compress(c, Arrays.copyOf(data, L)); + long[] answer2 = LongTestUtils.uncompress(c, comp2, L); + System.out.println(Arrays.toString(Arrays.copyOf( + answer, L))); + System.out.println(Arrays.toString(Arrays.copyOf(data, + L))); + throw new RuntimeException("bug"); + } + } + + } + } + + /** + * + */ + @Test + public void varyingLengthTest2() { + int N = 128; + long[] data = new long[N]; + data[127] = -1; + for (LongCODEC c : codecs) { + System.out.println("[BasicTest.varyingLengthTest2] codec = " + c); + try { + // CODEC Simple9 is limited to "small" integers. + if (c.getClass().equals( + Class.forName("me.lemire.integercompression.Simple9"))) + continue; + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } + try { + // CODEC Simple16 is limited to "small" integers. + if (c.getClass().equals( + Class.forName("me.lemire.integercompression.Simple16"))) + continue; + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } + try { + // CODEC GroupSimple9 is limited to "small" integers. + if (c.getClass().equals( + Class.forName("me.lemire.integercompression.GroupSimple9"))) + continue; + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } + + for (int L = 1; L <= 128; L++) { + long[] comp = LongTestUtils.compress(c, Arrays.copyOf(data, L)); + long[] answer = LongTestUtils.uncompress(c, comp, L); + for (int k = 0; k < L; ++k) + if (answer[k] != data[k]) + throw new RuntimeException("bug"); + } + for (int L = 128; L <= N; L *= 2) { + long[] comp = LongTestUtils.compress(c, Arrays.copyOf(data, L)); + long[] answer = LongTestUtils.uncompress(c, comp, L); + for (int k = 0; k < L; ++k) + if (answer[k] != data[k]) + throw new RuntimeException("bug"); + } + + } + } + + /** + * + */ + @Test + public void checkVariousCases() { + for (LongCODEC c : codecs) { + testZeroInZeroOut(c); + test(c, c, 5, 10); + test(c, c, 5, 14); + test(c, c, 2, 18); + // TODO Unclear which codec should manage an empty output array or not + // Some IntegerCodec does not output anything if the input is smaller than some block size + // testSpurious(c); + testUnsorted(c); + testUnsorted2(c); + testUnsorted3(c); + } + } + + /** + * check that the codecs can be inverted. + */ + @Test + public void basictest() { + for (LongCODEC codec : codecs) { + test(codec, 5, 10); + test(codec, 5, 14); + test(codec, 2, 18); + } + } + + private static void testSpurious(LongCODEC c) { + long[] x = new long[1024]; + long[] y = new long[0]; + IntWrapper i0 = new IntWrapper(0); + IntWrapper i1 = new IntWrapper(0); + for (int inlength = 0; inlength < 32; ++inlength) { + c.compress(x, i0, inlength, y, i1); + assertEquals(0, i1.intValue()); + } + } + + private static void testZeroInZeroOut(LongCODEC c) { + long[] x = new long[0]; + long[] y = new long[0]; + IntWrapper i0 = new IntWrapper(0); + IntWrapper i1 = new IntWrapper(0); + c.compress(x, i0, 0, y, i1); + assertEquals(0, i1.intValue()); + + long[] out = new long[0]; + IntWrapper outpos = new IntWrapper(0); + c.uncompress(y, i1, 0, out, outpos); + assertEquals(0, outpos.intValue()); + } + + private static void test(LongCODEC c, LongCODEC co, int N, int nbr) { + LongClusteredDataGenerator cdg = new LongClusteredDataGenerator(); + for (int sparsity = 1; sparsity < 31 - nbr; sparsity += 4) { + long[][] data = new long[N][]; + int max = (1 << (nbr + sparsity)); + for (int k = 0; k < N; ++k) { + data[k] = cdg.generateClustered((1 << nbr), max); + } + testCodec(c, co, data, max); + } + } + + private static void test(LongCODEC codec, int N, int nbr) { + LongClusteredDataGenerator cdg = new LongClusteredDataGenerator(); + System.out.println("[BasicTest.test] N = " + N + " " + nbr); + for (int sparsity = 1; sparsity < 63 - nbr; sparsity += 4) { + long[][] data = new long[N][]; + long max = (1L << (nbr + sparsity)); + for (int k = 0; k < N; ++k) { + data[k] = cdg.generateClustered((1 << nbr), max); + } + + testCodec(codec, codec, data, max); + } + } + + private static void testCodec(LongCODEC c, LongCODEC co, + long[][] data, long max) { + int N = data.length; + int maxlength = 0; + for (int k = 0; k < N; ++k) { + if (data[k].length > maxlength) + maxlength = data[k].length; + } + long[] buffer = new long[maxlength + 1024]; + long[] dataout = new long[4 * maxlength + 1024]; + // 4x + 1024 to account for the possibility of some negative + // compression. + IntWrapper inpos = new IntWrapper(); + IntWrapper outpos = new IntWrapper(); + for (int k = 0; k < N; ++k) { + long[] backupdata = Arrays.copyOf(data[k], data[k].length); + + inpos.set(1); + outpos.set(0); + if (!(c instanceof IntegratedLongCODEC)) { + LongDelta.delta(backupdata); + } + c.compress(backupdata, inpos, backupdata.length - inpos.get(), + dataout, outpos); + final int thiscompsize = outpos.get() + 1; + inpos.set(0); + outpos.set(1); + buffer[0] = backupdata[0]; + co.uncompress(dataout, inpos, thiscompsize - 1, buffer, outpos); + if (!(c instanceof IntegratedLongCODEC)) + LongDelta.fastinverseDelta(buffer); + + // Check assertions. + assertEquals("length is not match", outpos.get(), data[k].length); + long[] bufferCutout = Arrays.copyOf(buffer, outpos.get()); + assertArrayEquals("failed to reconstruct original data", data[k], + bufferCutout); + } + } + + /** + * @param codec + * provided codec + */ + public void testUnsorted(LongCODEC codec) { + int[] lengths = { 133, 1026, 1333333 }; + for (int N : lengths) { + long[] data = new long[N]; + // initialize the data (most will be small) + for (int k = 0; k < N; k += 1) + data[k] = 3; + // throw some larger values + for (int k = 0; k < N; k += 5) + data[k] = 100; + for (int k = 0; k < N; k += 533) + data[k] = 10000; + data[5] = -311; + // could need more compressing + long[] compressed = new long[(int) Math.ceil(N * 1.01) + 1024]; + IntWrapper inputoffset = new IntWrapper(0); + IntWrapper outputoffset = new IntWrapper(0); + codec.compress(data, inputoffset, data.length, compressed, + outputoffset); + // we can repack the data: (optional) + compressed = Arrays.copyOf(compressed, outputoffset.intValue()); + + long[] recovered = new long[N]; + IntWrapper recoffset = new IntWrapper(0); + codec.uncompress(compressed, new IntWrapper(0), compressed.length, + recovered, recoffset); + assertArrayEquals(data, recovered); + } + } + + private void testUnsorted2(LongCODEC codec) { + long[] data = new long[128]; + data[5] = -1; + long[] compressed = new long[1024]; + IntWrapper inputoffset = new IntWrapper(0); + IntWrapper outputoffset = new IntWrapper(0); + codec.compress(data, inputoffset, data.length, compressed, outputoffset); + // we can repack the data: (optional) + compressed = Arrays.copyOf(compressed, outputoffset.intValue()); + + long[] recovered = new long[128]; + IntWrapper recoffset = new IntWrapper(0); + codec.uncompress(compressed, new IntWrapper(0), compressed.length, + recovered, recoffset); + assertArrayEquals(data, recovered); + } + + private void testUnsorted3(LongCODEC codec) { + long[] data = new long[128]; + data[127] = -1; + long[] compressed = new long[1024]; + IntWrapper inputoffset = new IntWrapper(0); + IntWrapper outputoffset = new IntWrapper(0); + codec.compress(data, inputoffset, data.length, compressed, outputoffset); + // we can repack the data: (optional) + compressed = Arrays.copyOf(compressed, outputoffset.intValue()); + + long[] recovered = new long[128]; + IntWrapper recoffset = new IntWrapper(0); + codec.uncompress(compressed, new IntWrapper(0), compressed.length, + recovered, recoffset); + assertArrayEquals(data, recovered); + } + + /** + * + */ + @Test + public void fastPforTest() { + // proposed by Stefan Ackermann (https://github.com/Stivo) + for (LongCODEC codec : codecs) { + int N = FastPFOR.BLOCK_SIZE; + long[] data = new long[N]; + for (int i = 0; i < N; i++) + data[i] = 0; + data[126] = -1; + long[] comp = LongTestUtils.compress(codec, Arrays.copyOf(data, N)); + long[] answer = LongTestUtils.uncompress(codec, comp, N); + for (int k = 0; k < N; ++k) + if (answer[k] != data[k]) { + long[] comp2 = LongTestUtils.compress(codec, Arrays.copyOf(data, N)); + long[] answer2 = LongTestUtils.uncompress(codec, comp2, N); + throw new RuntimeException("bug " + k + " " + answer[k] + + " != " + data[k]); + } + } + } + + /** + * + */ + @Test + public void fastPfor128Test() { + // proposed by Stefan Ackermann (https://github.com/Stivo) + for (LongCODEC codec : codecs) { + int N = FastPFOR128.BLOCK_SIZE; + long[] data = new long[N]; + for (int i = 0; i < N; i++) + data[i] = 0; + data[126] = -1; + long[] comp = LongTestUtils.compress(codec, Arrays.copyOf(data, N)); + long[] answer = LongTestUtils.uncompress(codec, comp, N); + for (int k = 0; k < N; ++k) + if (answer[k] != data[k]) + throw new RuntimeException("bug " + k + " " + answer[k] + + " != " + data[k]); + } + } + +} diff --git a/src/test/java/me/lemire/longcompression/LongDeltaTest.java b/src/test/java/me/lemire/longcompression/LongDeltaTest.java new file mode 100644 index 0000000..bfa1e6f --- /dev/null +++ b/src/test/java/me/lemire/longcompression/LongDeltaTest.java @@ -0,0 +1,23 @@ +package me.lemire.longcompression; + +import me.lemire.longcompression.differential.LongDelta; +import org.junit.Test; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertNotNull; + +public class LongDeltaTest { + @Test + public void testEmptyArrayFastInverseDelta() { + LongCompressor compressor = new LongCompressor(); + long[] input = new long[0]; + + LongDelta.delta(input); + long[] compressed = compressor.compress(input); + long[] result = compressor.uncompress(compressed); + LongDelta.fastinverseDelta(result); + + assertNotNull(result); + assertArrayEquals(input, result); + } +} diff --git a/src/test/java/me/lemire/longcompression/LongTestUtils.java b/src/test/java/me/lemire/longcompression/LongTestUtils.java new file mode 100644 index 0000000..b7d9c63 --- /dev/null +++ b/src/test/java/me/lemire/longcompression/LongTestUtils.java @@ -0,0 +1,133 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +package me.lemire.longcompression; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertTrue; + +import java.util.Arrays; + +import me.lemire.integercompression.IntWrapper; + +/** + * Static utility methods for test. + */ +public class LongTestUtils { + + protected static void dumpIntArray(long[] data, String label) { + System.out.print(label); + for (int i = 0; i < data.length; ++i) { + if (i % 6 == 0) { + System.out.println(); + } + System.out.format(" %1$11d", data[i]); + } + System.out.println(); + } + + protected static void dumpIntArrayAsHex(long[] data, String label) { + System.out.print(label); + for (int i = 0; i < data.length; ++i) { + if (i % 8 == 0) { + System.out.println(); + } + System.out.format(" %1$08X", data[i]); + } + System.out.println(); + } + + /** + * Check that compress and uncompress keep original array. + * + * @param codec CODEC to test. + * @param orig original integers + */ + public static void assertSymmetry(LongCODEC codec, long... orig) { + // There are some cases that compressed array is bigger than original + // array. So output array for compress must be larger. + // + // Example: + // - VariableByte compresses an array like [ -1 ]. + // - Composition compresses a short array. + final int EXTEND = 1; + + long[] compressed = new long[orig.length + EXTEND]; + IntWrapper c_inpos = new IntWrapper(0); + IntWrapper c_outpos = new IntWrapper(0); + codec.compress(orig, c_inpos, orig.length, compressed, + c_outpos); + + assertTrue(c_outpos.get() <= orig.length + EXTEND); + + // Uncompress an array. + long[] uncompressed = new long[orig.length]; + IntWrapper u_inpos = new IntWrapper(0); + IntWrapper u_outpos = new IntWrapper(0); + codec.uncompress(compressed, u_inpos, c_outpos.get(), + uncompressed, u_outpos); + + // Compare between uncompressed and orig arrays. + long[] target = Arrays.copyOf(uncompressed, u_outpos.get()); + assertArrayEquals(orig, target); + } + + protected static long[] compress(LongCODEC codec, long[] data) { + long[] outBuf = new long[data.length * 8]; + IntWrapper inPos = new IntWrapper(); + IntWrapper outPos = new IntWrapper(); + codec.compress(data, inPos, data.length, outBuf, outPos); + return Arrays.copyOf(outBuf, outPos.get()); + } + + protected static long[] uncompress(LongCODEC codec, long[] data, int len) { + long[] outBuf = new long[len + 1024]; + IntWrapper inPos = new IntWrapper(); + IntWrapper outPos = new IntWrapper(); + codec.uncompress(data, inPos, data.length, outBuf, outPos); + return Arrays.copyOf(outBuf, outPos.get()); + } + + + + protected static byte[] compress(ByteLongCODEC codec, long[] data) { + byte[] outBuf = new byte[data.length * 4 * 4]; + IntWrapper inPos = new IntWrapper(); + IntWrapper outPos = new IntWrapper(); + codec.compress(data, inPos, data.length, outBuf, outPos); + return Arrays.copyOf(outBuf, outPos.get()); + } + + protected static long[] uncompress(ByteLongCODEC codec, byte[] data, int len) { + long[] outBuf = new long[len + 1024]; + IntWrapper inPos = new IntWrapper(); + IntWrapper outPos = new IntWrapper(); + codec.uncompress(data, inPos, data.length, outBuf, outPos); + return Arrays.copyOf(outBuf, outPos.get()); + } + + protected static long[] compressHeadless(SkippableLongCODEC codec, long[] data) { + long[] outBuf = new long[codec.maxHeadlessCompressedLength(new IntWrapper(0), data.length)]; + IntWrapper inPos = new IntWrapper(); + IntWrapper outPos = new IntWrapper(); + codec.headlessCompress(data, inPos, data.length, outBuf, outPos); + return Arrays.copyOf(outBuf, outPos.get()); + } + + protected static long[] uncompressHeadless(SkippableLongCODEC codec, long[] data, int len) { + long[] outBuf = new long[len + 1024]; + IntWrapper inPos = new IntWrapper(); + IntWrapper outPos = new IntWrapper(); + codec.headlessUncompress(data, inPos, data.length, outBuf, outPos,len); + if(outPos.get() < len) throw new RuntimeException("Insufficient output."); + return Arrays.copyOf(outBuf, outPos.get()); + } + + public static String longToBinaryWithLeading(long l) { + return String.format("%64s", Long.toBinaryString(l)).replace(' ', '0'); + } +} diff --git a/src/test/java/me/lemire/longcompression/SkippableLongBasicTest.java b/src/test/java/me/lemire/longcompression/SkippableLongBasicTest.java new file mode 100644 index 0000000..c4b7e01 --- /dev/null +++ b/src/test/java/me/lemire/longcompression/SkippableLongBasicTest.java @@ -0,0 +1,194 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +package me.lemire.longcompression; + +import java.util.Arrays; + +import org.junit.Test; + +import me.lemire.integercompression.IntWrapper; +import me.lemire.integercompression.TestUtils; +import me.lemire.integercompression.VariableByte; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertTrue; + +/** + * Just some basic sanity tests. + * + * @author Benoit Lacelle + */ +@SuppressWarnings({ "static-method" }) +public class SkippableLongBasicTest { + final SkippableLongCODEC[] codecs = { + new LongJustCopy(), + new LongVariableByte(), + new SkippableLongComposition(new LongBinaryPacking(), new LongVariableByte()), }; + + + /** + * + */ + @Test + public void consistentTest() { + int N = 4096; + long[] data = new long[N]; + long[] rev = new long[N]; + for (int k = 0; k < N; ++k) + data[k] = k % 128; + for (SkippableLongCODEC c : codecs) { + System.out.println("[SkippeableBasicTest.consistentTest] codec = " + + c); + for (int n = 0; n <= N; ++n) { + IntWrapper inPos = new IntWrapper(); + IntWrapper outPos = new IntWrapper(); + long[] outBuf = new long[c.maxHeadlessCompressedLength(new IntWrapper(0), n)]; + + c.headlessCompress(data, inPos, n, outBuf, outPos); + + IntWrapper inPoso = new IntWrapper(); + IntWrapper outPoso = new IntWrapper(); + c.headlessUncompress(outBuf, inPoso, outPos.get(), rev, + outPoso, n); + if (outPoso.get() != n) { + throw new RuntimeException("bug "+n); + } + if (inPoso.get() != outPos.get()) { + throw new RuntimeException("bug "+n+" "+inPoso.get()+" "+outPos.get()); + } + for (int j = 0; j < n; ++j) + if (data[j] != rev[j]) { + throw new RuntimeException("bug"); + } + } + } + } + + + /** + * + */ + @Test + public void varyingLengthTest() { + int N = 4096; + long[] data = new long[N]; + for (int k = 0; k < N; ++k) + data[k] = k; + for (SkippableLongCODEC c : codecs) { + System.out.println("[SkippeableBasicTest.varyingLengthTest] codec = "+c); + for (int L = 1; L <= 128; L++) { + long[] comp = LongTestUtils.compressHeadless(c, Arrays.copyOf(data, L)); + long[] answer = LongTestUtils.uncompressHeadless(c, comp, L); + for (int k = 0; k < L; ++k) + if (answer[k] != data[k]) + throw new RuntimeException("bug "+c.toString()+" "+k+" "+answer[k]+" "+data[k]); + } + for (int L = 128; L <= N; L *= 2) { + long[] comp = LongTestUtils.compressHeadless(c, Arrays.copyOf(data, L)); + long[] answer = LongTestUtils.uncompressHeadless(c, comp, L); + for (int k = 0; k < L; ++k) + if (answer[k] != data[k]) + throw new RuntimeException("bug"); + } + + } + } + + /** + * + */ + @Test + public void varyingLengthTest2() { + int N = 128; + long[] data = new long[N]; + data[127] = -1; + for (SkippableLongCODEC c : codecs) { + System.out.println("[SkippeableBasicTest.varyingLengthTest2] codec = "+c); + + try { + // CODEC Simple9 is limited to "small" integers. + if (c.getClass().equals( + Class.forName("me.lemire.integercompression.Simple9"))) + continue; + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } + try { + // CODEC Simple16 is limited to "small" integers. + if (c.getClass().equals( + Class.forName("me.lemire.integercompression.Simple16"))) + continue; + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } + for (int L = 1; L <= 128; L++) { + long[] comp = LongTestUtils.compressHeadless(c, Arrays.copyOf(data, L)); + long[] answer = LongTestUtils.uncompressHeadless(c, comp, L); + for (int k = 0; k < L; ++k) + if (answer[k] != data[k]) { + throw new RuntimeException("L=" + L + ": bug at k = "+k+" "+answer[k]+" "+data[k]+" for "+c.toString()); + } + } + for (int L = 128; L <= N; L *= 2) { + long[] comp = LongTestUtils.compressHeadless(c, Arrays.copyOf(data, L)); + long[] answer = LongTestUtils.uncompressHeadless(c, comp, L); + for (int k = 0; k < L; ++k) + if (answer[k] != data[k]) + throw new RuntimeException("bug"); + } + + } + } + + @Test + public void testMaxHeadlessCompressedLength() { + testMaxHeadlessCompressedLength(new LongJustCopy(), 128); + testMaxHeadlessCompressedLength(new LongBinaryPacking(), 16 * LongBinaryPacking.BLOCK_SIZE); + testMaxHeadlessCompressedLength(new LongVariableByte(), 128); + testMaxHeadlessCompressedLength(new SkippableLongComposition(new LongBinaryPacking(), new LongVariableByte()), 16 * LongBinaryPacking.BLOCK_SIZE + 10); + } + + private static void testMaxHeadlessCompressedLength(SkippableLongCODEC codec, int inlengthTo) { + for (int inlength = 0; inlength < inlengthTo; ++inlength) { + long[] input = new long[inlength]; + Arrays.fill(input, -1L); + + int maxOutputLength = codec.maxHeadlessCompressedLength(new IntWrapper(), inlength); + long[] output = new long[maxOutputLength]; + IntWrapper outPos = new IntWrapper(); + + codec.headlessCompress(input, new IntWrapper(), inlength, output, outPos); + // If we reach this point, no exception was thrown, which means the calculated output length was sufficient. + + assertTrue(maxOutputLength <= outPos.get() + 1); // +1 because SkippableLongComposition always adds one extra integer for the potential header + } + } + + @Test + public void testUncompressOutputOffset_SkippableLongComposition() { + for (int offset : new int[] {0, 1, 6}) { + SkippableLongComposition codec = new SkippableLongComposition(new LongBinaryPacking(), new LongVariableByte()); + + long[] input = { 2, 3, 4, 5 }; + long[] compressed = new long[codec.maxHeadlessCompressedLength(new IntWrapper(0), input.length)]; + long[] uncompressed = new long[offset + input.length]; + + IntWrapper inputOffset = new IntWrapper(0); + IntWrapper compressedOffset = new IntWrapper(0); + + codec.headlessCompress(input, inputOffset, input.length, compressed, compressedOffset); + + int compressedLength = compressedOffset.get(); + IntWrapper uncompressedOffset = new IntWrapper(offset); + compressedOffset = new IntWrapper(0); + codec.headlessUncompress(compressed, compressedOffset, compressedLength, uncompressed, uncompressedOffset, input.length); + + assertArrayEquals(input, Arrays.copyOfRange(uncompressed, offset, offset + input.length)); + } + } +} diff --git a/src/test/java/me/lemire/longcompression/TestLongAs2IntsCodec.java b/src/test/java/me/lemire/longcompression/TestLongAs2IntsCodec.java new file mode 100644 index 0000000..bddff2a --- /dev/null +++ b/src/test/java/me/lemire/longcompression/TestLongAs2IntsCodec.java @@ -0,0 +1,31 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +package me.lemire.longcompression; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Edge-cases having caused issue specifically with LongVariableByte. + * + * @author Benoit Lacelle + */ +public class TestLongAs2IntsCodec extends ATestLongCODEC { + final LongAs2IntsCodec codec = new LongAs2IntsCodec(); + + @Override + public LongCODEC getCodec() { + return codec; + } + + @Test + public void testCodec_intermediateHighPowerOfTwo() { + Assert.assertEquals(3, LongTestUtils.compress((LongCODEC) codec, new long[] { 1L << 42 }).length); + } + +} diff --git a/src/test/java/me/lemire/longcompression/TestLongBinaryPacking.java b/src/test/java/me/lemire/longcompression/TestLongBinaryPacking.java new file mode 100644 index 0000000..ecc3f2e --- /dev/null +++ b/src/test/java/me/lemire/longcompression/TestLongBinaryPacking.java @@ -0,0 +1,26 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +package me.lemire.longcompression; + +import org.junit.Ignore; + +/** + * Edge-cases having caused issue specifically with LongBinaryPacking. + * + * @author Benoit Lacelle + */ +@Ignore("Parent class tests are not valid as LongBinaryPacking process by chunks of 64 longs") +public class TestLongBinaryPacking extends ATestLongCODEC { + final LongBinaryPacking codec = new LongBinaryPacking(); + + @Override + public LongCODEC getCodec() { + return codec; + } + +} diff --git a/src/test/java/me/lemire/longcompression/TestLongVariableByte.java b/src/test/java/me/lemire/longcompression/TestLongVariableByte.java new file mode 100644 index 0000000..3cb2a49 --- /dev/null +++ b/src/test/java/me/lemire/longcompression/TestLongVariableByte.java @@ -0,0 +1,40 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +package me.lemire.longcompression; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Edge-cases having caused issue specifically with LongVariableByte. + * + * @author Benoit Lacelle + */ +public class TestLongVariableByte extends ATestLongCODEC { + final LongVariableByte codec = new LongVariableByte(); + + @Override + public LongCODEC getCodec() { + return codec; + } + + @Test + public void testCodec_allBitWidths() { + for (int bitWidth = 0; bitWidth <= 64; bitWidth++) { + long value = bitWidth == 0 ? 0 : 1L << (bitWidth - 1); + + int expectedSizeInBytes = Math.max(1, (bitWidth + 6) / 7); + int expectedSizeInLongs = (expectedSizeInBytes > 8) ? 2 : 1; + + Assert.assertEquals(expectedSizeInLongs, LongTestUtils.compress((LongCODEC) codec, new long[] { value }).length); + Assert.assertEquals(expectedSizeInBytes, LongTestUtils.compress((ByteLongCODEC) codec, new long[] { value }).length); + Assert.assertEquals(expectedSizeInLongs, + LongTestUtils.compressHeadless((SkippableLongCODEC) codec, new long[] { value }).length); + } + } +} diff --git a/src/test/java/me/lemire/longcompression/synth/LongClusteredDataGenerator.java b/src/test/java/me/lemire/longcompression/synth/LongClusteredDataGenerator.java new file mode 100644 index 0000000..c964f6f --- /dev/null +++ b/src/test/java/me/lemire/longcompression/synth/LongClusteredDataGenerator.java @@ -0,0 +1,91 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ +package me.lemire.longcompression.synth; + +import me.lemire.integercompression.synth.ClusteredDataGenerator; + +/** + * This class will generate lists of random longs based on the clustered + * model: + * + * Reference: Vo Ngoc Anh and Alistair Moffat. 2010. Index compression using + * 64-bit words. Softw. Pract. Exper.40, 2 (February 2010), 131-147. + * + * @author Benoit Lacelle + * @see ClusteredDataGenerator + */ +public class LongClusteredDataGenerator { + + final LongUniformDataGenerator unidg = new LongUniformDataGenerator(); + + /** + * Creating random array generator. + */ + public LongClusteredDataGenerator() { + } + + void fillUniform(long[] array, int offset, int length, long Min, long Max) { + long[] v = this.unidg.generateUniform(length, Max - Min); + for (int k = 0; k < v.length; ++k) + array[k + offset] = Min + v[k]; + } + + void fillClustered(long[] array, int offset, int length, long Min, long Max) { + final long range = Max - Min; + if ((range == length) || (length <= 10)) { + fillUniform(array, offset, length, Min, Max); + return; + } + final long cut = length + / 2 + + ((range - length - 1 > 0) ? (long)this.unidg.rand + .nextDouble() * (range - length - 1) : 0); + final double p = this.unidg.rand.nextDouble(); + if (p < 0.25) { + fillUniform(array, offset, length / 2, Min, Min + cut); + fillClustered(array, offset + length / 2, length + - length / 2, Min + cut, Max); + } else if (p < 0.5) { + fillClustered(array, offset, length / 2, Min, Min + cut); + fillUniform(array, offset + length / 2, length - length + / 2, Min + cut, Max); + } else { + fillClustered(array, offset, length / 2, Min, Min + cut); + fillClustered(array, offset + length / 2, length + - length / 2, Min + cut, Max); + } + } + + /** + * generates randomly N distinct integers from 0 to Max. + * + * @param N + * number of integers to generate + * @param Max + * maximal value of the integers + * @return array containing the integers + */ + public long[] generateClustered(int N, long Max) { + long[] array = new long[N]; + fillClustered(array, 0, N, 0, Max); + return array; + } + + /** + * Little test program. + * + * @param args + * arguments are ignored + */ + public static void main(final String[] args) { + long[] example = (new LongClusteredDataGenerator()) + .generateClustered(20, 1000); + for (int k = 0; k < example.length; ++k) + System.out.println(example[k]); + } + +} diff --git a/src/test/java/me/lemire/longcompression/synth/LongUniformDataGenerator.java b/src/test/java/me/lemire/longcompression/synth/LongUniformDataGenerator.java new file mode 100644 index 0000000..4aa797b --- /dev/null +++ b/src/test/java/me/lemire/longcompression/synth/LongUniformDataGenerator.java @@ -0,0 +1,125 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ +package me.lemire.longcompression.synth; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Random; +import java.util.Set; + +import org.roaringbitmap.longlong.Roaring64Bitmap; + +import me.lemire.integercompression.synth.UniformDataGenerator; + +/** + * This class will generate "uniform" lists of random longs. + * + * @author Benoit Lacelle + * @see UniformDataGenerator + */ +public class LongUniformDataGenerator { + /** + * construct generator of random arrays. + */ + public LongUniformDataGenerator() { + this.rand = new Random(); + } + + /** + * @param seed + * random seed + */ + public LongUniformDataGenerator(final int seed) { + this.rand = new Random(seed); + } + + /** + * generates randomly N distinct longs from 0 to Max. + */ + long[] generateUniformHash(int N, long Max) { + if (N > Max) + throw new RuntimeException("not possible"); + long[] ans = new long[N]; + Set s = new HashSet<>(); + while (s.size() < N) + s.add((long) (this.rand.nextDouble() * Max)); + Iterator i = s.iterator(); + for (int k = 0; k < N; ++k) + ans[k] = i.next().longValue(); + Arrays.sort(ans); + return ans; + } + + /** + * output all longs from the range [0,Max) that are not in the array + */ + static long[] negate(long[] x, long Max) { + int newLength = saturatedCast(Max - x.length); + long[] ans = new long[newLength]; + int i = 0; + int c = 0; + for (int j = 0; j < x.length; ++j) { + long v = x[j]; + for (; i < v; ++i) + ans[c++] = i; + ++i; + } + while (c < ans.length) + ans[c++] = i++; + return ans; + } + + private static int saturatedCast(long toInt) { + if (toInt > Integer.MAX_VALUE) { + return Integer.MAX_VALUE; + } else { + return (int) toInt; + } + } + + /** + * generates randomly N distinct longs from 0 to Max. + * + * @param N + * number of longs to generate + * @param Max + * bound on the value of longs + * @return an array containing randomly selected longs + */ + public long[] generateUniform(int N, long Max) { + assert N >= 0; + assert Max >= 0; + if (N * 2 > Max) { + return negate(generateUniform(saturatedCast(Max - N), Max), Max); + } + if (2048 * N > Max) + return generateUniformBitmap(N, Max); + return generateUniformHash(N, Max); + } + + /** + * generates randomly N distinct longs from 0 to Max. + */ + long[] generateUniformBitmap(int N, long Max) { + if (N > Max) + throw new RuntimeException("not possible"); + Roaring64Bitmap bs = new Roaring64Bitmap(); + int cardinality = 0; + while (cardinality < N) { + long v = (long) (rand.nextDouble() * Max); + if (!bs.contains(v)) { + bs.add(v); + cardinality++; + } + } + return bs.toArray(); + } + + Random rand = new Random(); + +} \ No newline at end of file