Support exact mode for lossless

Before this change, WebP2 was capable of storing only premultiplied
internal samples (meaning each other channel is multiplied by the
alpha one for better compression). WebP2 can now be truly lossless
with translucid images.
Add test_exact.cc

Change-Id: If6edab7f383d2953a28787a5b92579ce21722a68
Reviewed-on: https://chromium-review.googlesource.com/c/codecs/libwebp2/+/4249957
Tested-by: WebM Builds <builds@webmproject.org>
Reviewed-by: Maryla Ustarroz-Calonge <maryla@google.com>
diff --git a/examples/cwp2.cc b/examples/cwp2.cc
index 12d0c3a..75aba8d 100644
--- a/examples/cwp2.cc
+++ b/examples/cwp2.cc
@@ -96,6 +96,7 @@
   opt.Add("-alpha_q <float>",
           SPrintf("alpha quality factor (0:small..100:big), default=%1.1f",
                   WP2::EncoderConfig::kDefault.alpha_quality));
+  opt.Add("-exact", "No premultiplication is done in lossless.");
   opt.Add("-target_size <int>", "target size (in bytes)");
   opt.Add("-target_psnr <float>", "target PSNR (in dB. typically: 42)");
   opt.Add("-effort <int>",
@@ -594,6 +595,8 @@
     } else if (!strcmp(argv[c], "-alpha_q")) {
       NEED_ARGS(1);
       params.config.alpha_quality = ExUtilGetFloat(argv[++c], &parse_error);
+    } else if (!strcmp(argv[c], "-exact")) {
+      params.config.exact = true;
     } else if (!strcmp(argv[c], "-csp")) {
       NEED_ARGS(1);
       params.config.csp_type = (WP2::Csp)ExUtilGetInt(argv[++c], &parse_error);
diff --git a/examples/dwp2.cc b/examples/dwp2.cc
index 12650de..2d6961a 100644
--- a/examples/dwp2.cc
+++ b/examples/dwp2.cc
@@ -17,6 +17,7 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
+#include <algorithm>
 #include <cstdio>
 #include <cstring>
 #include <string>
@@ -30,7 +31,6 @@
 #include "imageio/image_dec.h"
 #include "imageio/image_enc.h"
 #include "imageio/imageio_util.h"
-#include "src/common/color_precision.h"
 #include "src/utils/thread_utils.h"
 
 namespace {
@@ -89,6 +89,7 @@
   opt.Add("-force",
           "overwrite destination if it exists (default=off unless out_file is "
           "explicit)");
+  opt.Add("-exact", "No premultiplication is done in lossless.");
   opt.Add("-info", "just print bitstream info and exit");
   opt.Add("-v", "verbose (e.g. print encoding/decoding times)");
   opt.Add("-progress", "display decoding progression");
@@ -265,7 +266,8 @@
       const std::string& in_path, std::string* const standard_out,
       std::string* const standard_err,
       WP2::ProgressHook* const progress_hook) {
-    WP2::ArgbBuffer output_buffer(WP2_Argb_32);
+    WP2::ArgbBuffer output_buffer(settings_.config.exact ? WP2_ARGB_32
+                                                         : WP2_Argb_32);
 
     WP2::Data data;
     WP2_CHECK_STATUS(WP2::IoUtilReadFile(in_path.c_str(), &data));
@@ -473,6 +475,8 @@
       settings.output_frames_to_directory = true;
     } else if (!strcmp(argv[c], "-force")) {
       settings.allow_overwrite = true;
+    } else if (!strcmp(argv[c], "-exact")) {
+      settings.config.exact = true;
     } else if (!strcmp(argv[c], "-info")) {
       settings.print_info = true;
     } else if (!strcmp(argv[c], "-vdebug") && c + 1 < argc) {
diff --git a/imageio/pngenc.cc b/imageio/pngenc.cc
index 0456cc5..08554b4 100644
--- a/imageio/pngenc.cc
+++ b/imageio/pngenc.cc
@@ -177,7 +177,8 @@
 WP2Status WritePNG(const ArgbBuffer& buffer, FILE* fout,
                    const char*, bool, bool, size_t*) {
   WP2_CHECK_OK(fout != nullptr, WP2_STATUS_NULL_PARAMETER);
-  WP2_CHECK_OK(buffer.format() == WP2_Argb_32, WP2_STATUS_UNSUPPORTED_FEATURE);
+  WP2_CHECK_OK(buffer.format() == WP2_Argb_32 || buffer.format() == WP2_ARGB_32,
+               WP2_STATUS_UNSUPPORTED_FEATURE);
 
   const uint32_t width = buffer.width();
   const uint32_t height = buffer.height();
@@ -210,8 +211,14 @@
 
   WP2ArgbConverterInit();
   for (png_uint_32 y = 0; y < height; ++y) {
-    WP2ArgbConvertTo[has_alpha ? WP2_RGBA_32 : WP2_RGB_24](row, width,
-                                                           ARGB.data());
+    if (buffer.format() == WP2_Argb_32) {
+      WP2ArgbConvertTo[has_alpha ? WP2_RGBA_32 : WP2_RGB_24](row, width,
+                                                             ARGB.data());
+    } else {
+      assert(buffer.format() == WP2_ARGB_32);
+      WP2ARGBConvertTo[has_alpha ? WP2_RGBA_32 : WP2_RGB_24](row, width,
+                                                             ARGB.data());
+    }
     png_bytep ptr[1] = {ARGB.data()};
     png_write_rows(png, ptr, 1);
     row += buffer.stride();
diff --git a/src/dec/bypass_dec.cc b/src/dec/bypass_dec.cc
index 18f79a1..05a231c 100644
--- a/src/dec/bypass_dec.cc
+++ b/src/dec/bypass_dec.cc
@@ -17,6 +17,8 @@
 //
 // Author: Yannis Guyon (yguyon@google.com)
 
+#include <cstring>
+
 #include "src/common/global_params.h"
 #include "src/common/header_enc_dec.h"
 #include "src/common/lossy/block.h"
@@ -87,7 +89,8 @@
     DataSource::DataHandle handle;
     WP2_CHECK_OK(tile_->input->TryReadNext(bytes_per_row, &handle),
                  WP2_STATUS_NOT_ENOUGH_DATA);
-    if (tile_->rgb_output.format() == WP2_Argb_32) {
+    if (tile_->rgb_output.format() == WP2_Argb_32 ||
+        tile_->rgb_output.format() == WP2_ARGB_32) {
       uint8_t* const row = tile_->rgb_output.GetRow8(y);
       if (gparams_->has_alpha_) {
         std::memcpy(row, handle.GetBytes(), handle.GetSize());
diff --git a/src/dec/lossless/losslessi_dec.cc b/src/dec/lossless/losslessi_dec.cc
index c31069d..9ba5e31 100644
--- a/src/dec/lossless/losslessi_dec.cc
+++ b/src/dec/lossless/losslessi_dec.cc
@@ -185,7 +185,7 @@
 // Emit rows without any scaling.
 template <typename T>
 uint32_t EmitRows(const int16_t* row_in, uint32_t mb_w, uint32_t mb_h,
-                  bool has_alpha, bool premultiply, T* const out,
+                  bool has_alpha, bool exact, bool premultiply, T* const out,
                   uint32_t out_stride) {
   int lines = mb_h;
   T* row_out = out;
@@ -195,12 +195,14 @@
       const uint8_t alpha = (uint8_t)row_in[0];
       if (!has_alpha) assert(alpha == WP2::kAlphaMax);
       row_out[i] = alpha;
-      if (alpha > 0) {
+      if (exact || (alpha > 0 && !premultiply)) {
         for (int j = 1; j <= 3; ++j) {
-          row_out[i + j] = premultiply
-                               ? static_cast<T>(WP2::DivBy255(
-                                     static_cast<uint32_t>(row_in[j]) * alpha))
-                               : static_cast<T>(row_in[j]);
+          row_out[i + j] = static_cast<T>(row_in[j]);
+        }
+      } else if (alpha > 0 && premultiply) {
+        for (int j = 1; j <= 3; ++j) {
+          row_out[i + j] = static_cast<T>(
+              WP2::DivBy255(static_cast<uint32_t>(row_in[j]) * alpha));
         }
       } else {
         // To optimize encoding, the encoder can store arbitrary rgb values when
@@ -310,13 +312,13 @@
 
     uint32_t num_rows_out;
     if (WP2Formatbpc(buf->format()) <= 8) {
-      num_rows_out =
-          EmitRows(rows_data, buf->width(), num_rows, gparams_->has_alpha_,
-                   premultiply_, buf->GetRow8(last_out_row_), buf->stride());
+      num_rows_out = EmitRows(rows_data, buf->width(), num_rows,
+                              gparams_->has_alpha_, config_.exact, premultiply_,
+                              buf->GetRow8(last_out_row_), buf->stride());
     } else {
-      num_rows_out =
-          EmitRows(rows_data, buf->width(), num_rows, gparams_->has_alpha_,
-                   premultiply_, buf->GetRow16(last_out_row_), buf->stride());
+      num_rows_out = EmitRows(rows_data, buf->width(), num_rows,
+                              gparams_->has_alpha_, config_.exact, premultiply_,
+                              buf->GetRow16(last_out_row_), buf->stride());
     }
     // Update 'last_out_row_'.
     last_out_row_ += num_rows_out;
@@ -614,7 +616,8 @@
 
 static WP2Status ReadPaletteVerbatim(uint32_t num_colors,
                                      WP2SampleFormat format, bool has_alpha,
-                                     bool is_grayscale, WP2::ANSDec* const dec,
+                                     bool exact, bool is_grayscale,
+                                     WP2::ANSDec* const dec,
                                      Transform* const transform) {
   WP2_CHECK_ALLOC_OK(transform->data_.resize(4 * num_colors));
   uint32_t channel_max[4];
@@ -625,23 +628,19 @@
     if (has_alpha) {
       transform->data_[4 * i + 0] =
           dec->ReadRValue(channel_max[0] + 1, "color_verbatim");
-      for (uint32_t c = 1; c < 4; ++c) {
-        const uint32_t max = WP2::DivRound(
-            (uint32_t)(channel_max[c] * transform->data_[4 * i + 0]),
-            channel_max[0]);
-        transform->data_[4 * i + c] =
-            (is_grayscale && c > 1)
-                ? transform->data_[4 * i + 1]
-                : dec->ReadRValue(max + 1, "color_verbatim");
-      }
     } else {
       transform->data_[4 * i + 0] = channel_max[0];
-      for (uint32_t c = 1; c < 4; ++c) {
-        transform->data_[4 * i + c] =
-            (is_grayscale && c > 1)
-                ? transform->data_[4 * i + 1]
-                : dec->ReadRValue(channel_max[c] + 1, "color_verbatim");
-      }
+    }
+    for (uint32_t c = 1; c < 4; ++c) {
+      const uint32_t max =
+          (has_alpha && !exact)
+              ? WP2::DivRound(
+                    (uint32_t)(channel_max[c] * transform->data_[4 * i + 0]),
+                    channel_max[0])
+              : channel_max[c];
+      transform->data_[4 * i + c] =
+          (is_grayscale && c > 1) ? transform->data_[4 * i + 1]
+                                  : dec->ReadRValue(max + 1, "color_verbatim");
     }
   }
   return dec->GetStatus();
@@ -662,7 +661,7 @@
   if (num_colors_ <= kSmallPaletteLimit) {
     WP2_CHECK_STATUS(ReadPaletteVerbatim(
         num_colors_, hdr_.symbols_info_.SampleFormat(), gparams_->has_alpha_,
-        is_grayscale, dec_, transform));
+        config_.exact, is_grayscale, dec_, transform));
     dec_->PopDebugPrefix();
     return WP2_STATUS_OK;
   }
@@ -713,7 +712,9 @@
         }
         const int16_t alpha = data[4 * i + 0];
         int16_t channel_prev = data[4 * (i - 1) + c];
-        if (c > 0 && channel_prev > alpha) channel_prev = alpha;
+        if (c > 0 && channel_prev > alpha && !config_.exact) {
+          channel_prev = alpha;
+        }
         const auto channel_diff = data[4 * i + c];
         if (is_positive[c][i - 1]) {
           data[4 * i + c] = channel_prev + channel_diff;
@@ -739,7 +740,8 @@
       }
       const bool use_vector = dec_->ReadBool("use_vector");
       // Read the first channel.
-      const uint32_t max_first = (c == 0) ? channel_max : transform->data_[0];
+      const uint32_t max_first =
+          (c == 0 || config_.exact) ? channel_max : transform->data_[0];
       int16_t channel = dec_->ReadRValue(max_first + 1, "first_channel");
       transform->data_[c] = channel;
 
@@ -752,7 +754,7 @@
         for (uint32_t i = 1; i < num_colors_; ++i) {
           if (gparams_->has_alpha_) {
             const int16_t alpha = transform->data_[4 * i + 0];
-            if (c > 0 && channel > alpha) channel = alpha;
+            if (c > 0 && channel > alpha && !config_.exact) channel = alpha;
           }
           channel = WP2::Clamp<int32_t>(
               channel + (is_positive[c][i - 1] ? diffs[i - 1] : -diffs[i - 1]),
@@ -763,11 +765,15 @@
         for (uint32_t i = 1; i < num_colors_; ++i) {
           const int16_t alpha = transform->data_[4 * i + 0];
           assert(alpha >= 0 && alpha <= (int16_t)alpha_max);
-          if (gparams_->has_alpha_ && c > 0 && channel > alpha) channel = alpha;
+          if (gparams_->has_alpha_ && c > 0 && channel > alpha &&
+              !config_.exact) {
+            channel = alpha;
+          }
           if (is_positive[c][i - 1]) {
             const uint32_t max =
-                (c == 0) ? channel_max
-                         : WP2::DivRound(channel_max * alpha, alpha_max);
+                (c == 0 || config_.exact)
+                    ? channel_max
+                    : WP2::DivRound(channel_max * alpha, alpha_max);
             channel += dec_->ReadRValue(max + 1 - channel, "store_diff");
           } else {
             channel -= dec_->ReadRValue(channel + 1, "store_diff");
@@ -1026,6 +1032,7 @@
         RegisterTransformForVDebug(transforms_[i], config_.info));
   }
   premultiply_ = (gparams_->has_alpha_ ? dec_->ReadBool("premultiply") : false);
+  if (config_.exact) premultiply_ = false;
 
   // Modify the main image ranges if we only use the palette.
   if (num_transforms == 1 && transforms[0] == TransformType::kColorIndexing) {
diff --git a/src/dec/main_dec.cc b/src/dec/main_dec.cc
index 5087fb7..173671e 100644
--- a/src/dec/main_dec.cc
+++ b/src/dec/main_dec.cc
@@ -18,7 +18,10 @@
 // Author: Skal (pascal.massimino@gmail.com)
 
 #include <algorithm>
+#include <array>
 #include <cstddef>
+#include <string>
+#include <vector>
 
 #include "src/common/color_precision.h"
 #include "src/common/progress_watcher.h"
@@ -441,7 +444,9 @@
                  const DecoderConfig& config) {
   WP2_CHECK_OK(output != nullptr, WP2_STATUS_NULL_PARAMETER);
   const WP2SampleFormat format_default =
-      (WP2Formatbpc(output->format()) <= 8) ? WP2_Argb_32 : WP2_Argb_38;
+      (WP2Formatbpc(output->format()) <= 8)
+          ? (config.exact ? WP2_ARGB_32 : WP2_Argb_32)
+          : WP2_Argb_38;
   ArgbBuffer tmp_output(format_default);
   ArgbBuffer* real_output = nullptr;
 
diff --git a/src/enc/lossless/analysisl_enc.cc b/src/enc/lossless/analysisl_enc.cc
index 5271f13..2cb003b 100644
--- a/src/enc/lossless/analysisl_enc.cc
+++ b/src/enc/lossless/analysisl_enc.cc
@@ -205,7 +205,7 @@
     WP2_CHECK_ALLOC_OK(configs->reserve(1));
     for (EntropyMode mode :
          {EntropyMode::kPalette, EntropyMode::kPaletteSpatial}) {
-      AddConfig(mode, /*use_premultiplied=*/true,
+      AddConfig(mode, /*use_premultiplied=*/!encoder.config_.exact,
                 /*red_and_blue_always_zero=*/true, /*entropy=*/0.f, effort,
                 configs);
     }
@@ -496,13 +496,15 @@
   // Do not use a palette when there is only one color as ANS will optimize it
   // anyway.
   const bool can_use_palette = (encoder->palette_.Size() > 1);
+  const bool can_premultiply =
+      (!encoder->config_.exact && encoder->has_alpha_ &&
+       !WP2IsPremultiplied(encoder->pic_.format()));
 
   // Try LZW within the same palette size range condition as Group4.
   if (encoder->palette_.Size() >= 2 && encoder->palette_.Size() < 256) {
     WP2_CHECK_ALLOC_OK(configs->resize(configs->size() + 1));
     configs->back().transforms = FindTransformTypes(TransformType::kLZW);
-    configs->back().use_premultiplied =
-        (encoder->has_alpha_ && !WP2IsPremultiplied(encoder->pic_.format()));
+    configs->back().use_premultiplied = can_premultiply;
   }
 
   // Do not use Group4 for big palettes for CPU reasons and also because it is
@@ -531,8 +533,6 @@
       // AnalyzeEntropy is somewhat slow. 'red_and_blue_always_zero' is unused
       // for effort == 0.
       configs->clear();
-      const bool can_premultiply =
-          (encoder->has_alpha_ && !WP2IsPremultiplied(encoder->pic_.format()));
       AddConfig(can_use_palette ? EntropyMode::kPalette
                                 : EntropyMode::kSpatialSubGreen,
                 can_premultiply, /*red_and_blue_always_zero=*/false,
@@ -575,15 +575,14 @@
   // Use the minimal-size method for palette storage by default.
   uint32_t configs_size = configs->size();
   for (uint32_t i = 0; i < configs_size; ++i) {
-    CrunchConfig& config = (*configs)[i];
     for (uint32_t t = 0; t < kPossibleTransformCombinationSize; ++t) {
-      if (config.transforms[t] == TransformType::kColorIndexing) {
-        config.palette_sorting_type = Palette::Sorting::kMinimalSize;
+      if ((*configs)[i].transforms[t] == TransformType::kColorIndexing) {
+        (*configs)[i].palette_sorting_type = Palette::Sorting::kMinimalSize;
         if (effort == 9) {
           // Try more methods for palette sorting.
-          WP2_CHECK_ALLOC_OK(configs->push_back(config));
+          WP2_CHECK_ALLOC_OK(configs->push_back((*configs)[i]));
           configs->back().palette_sorting_type = Palette::Sorting::kLuminance;
-          WP2_CHECK_ALLOC_OK(configs->push_back(config));
+          WP2_CHECK_ALLOC_OK(configs->push_back((*configs)[i]));
           configs->back().palette_sorting_type =
               Palette::Sorting::kModifiedZeng;
         }
diff --git a/src/enc/lossless/losslessi_enc.cc b/src/enc/lossless/losslessi_enc.cc
index cf51be6..dffecbb 100644
--- a/src/enc/lossless/losslessi_enc.cc
+++ b/src/enc/lossless/losslessi_enc.cc
@@ -22,10 +22,10 @@
 #include <algorithm>
 #include <cassert>
 #include <cstdlib>
+#include <limits>
 #include <memory>
 #include <string>
 
-#include "src/common/color_precision.h"
 #include "src/common/progress_watcher.h"
 #include "src/common/symbols.h"
 #include "src/dsp/lossless/encl_dsp.h"
@@ -34,8 +34,6 @@
 #include "src/enc/symbols_enc.h"
 #include "src/utils/ans.h"
 #include "src/utils/ans_utils.h"
-#include "src/utils/plane.h"
-#include "src/utils/vector.h"
 
 namespace WP2L {
 
@@ -651,9 +649,10 @@
     bool can_use_sub_modes, const ProgressRange& progress, uint32_t pred_bits,
     WP2::ANSEnc* const enc, WP2::ANSDictionaries* const dicts) {
   WP2::Vector_s8 sub_modes;
-  WP2_CHECK_STATUS(CreateResidualImage(
-      pred_bits, can_use_sub_modes, min_values, max_values, config_.effort,
-      &argb_buffer_, argb_scratch_, transform_data_, &sub_modes));
+  WP2_CHECK_STATUS(
+      CreateResidualImage(pred_bits, config_.exact, can_use_sub_modes,
+                          min_values, max_values, config_.effort, &argb_buffer_,
+                          argb_scratch_, transform_data_, &sub_modes));
 
   const WP2::ANSDebugPrefix prefix(enc, "predictor");
   enc->PutRange(pred_bits, kTransformBitsMin, kTransformBitsMax,
@@ -776,7 +775,7 @@
     int16_t* const dst_row = argb_buffer_.GetRow(y);
     if (WP2Formatbpc(pic_.format()) <= 8) {
       const uint8_t* const src_row = pic_.GetRow8(y);
-      if (can_premultiply) {
+      if (can_premultiply && !config_.exact) {
         for (uint32_t x = 0; x < width; ++x) {
           const uint8_t a = src_row[4 * x + 0];
           if (a == 0) {
@@ -1067,7 +1066,7 @@
 WP2Status Encoder::Allocate() {
   const uint32_t num_pixels = pic_.width() * pic_.height();
   WP2_CHECK_ALLOC_OK(hash_chain_.Allocate(num_pixels));
-  palette_.Init(symbols_info_.SampleFormat(), has_alpha_);
+  palette_.Init(symbols_info_.SampleFormat(), has_alpha_, config_.exact);
   ref_pool_.Init(num_pixels);
 
   return WP2_STATUS_OK;
diff --git a/src/enc/lossless/losslessi_enc.h b/src/enc/lossless/losslessi_enc.h
index 2e23c47..af98261 100644
--- a/src/enc/lossless/losslessi_enc.h
+++ b/src/enc/lossless/losslessi_enc.h
@@ -229,7 +229,7 @@
 // 'can_use_sub_modes' indicates whether sub-modes are allowed by the transform
 // and should be tried to find the best predictors.
 // If sub-modes are used, 'sub_modes' is not empty and contains the sub-modes.
-WP2Status CreateResidualImage(uint32_t bits, bool can_use_sub_modes,
+WP2Status CreateResidualImage(uint32_t bits, bool exact, bool can_use_sub_modes,
                               const int16_t min_values[4],
                               const int16_t max_values[4], int effort,
                               Buffer_s16* const buffer,
diff --git a/src/enc/lossless/palette.cc b/src/enc/lossless/palette.cc
index 2b6f649..30be4f4 100644
--- a/src/enc/lossless/palette.cc
+++ b/src/enc/lossless/palette.cc
@@ -25,7 +25,6 @@
 
 #include "src/common/color_precision.h"
 #include "src/common/lossless/color_cache.h"
-#include "src/dsp/lossless/encl_dsp.h"
 #include "src/enc/lossless/losslessi_enc.h"
 #include "src/utils/ans_utils.h"
 #include "src/wp2/format_constants.h"
@@ -139,12 +138,12 @@
 }
 
 template <typename T>
-uint32_t CountColors(const WP2::ArgbBuffer& pic, const T* src,
+uint32_t CountColors(const WP2::ArgbBuffer& pic, const T* src, bool exact,
                      ColorCacheMap* const color_cache,
                      const uint32_t max_palette_size) {
   assert(pic.format() == WP2_Argb_32 || pic.format() == WP2_Argb_38 ||
          pic.format() == WP2_ARGB_32);
-  const bool premultiply = !WP2IsPremultiplied(pic.format());
+  const bool premultiply = !exact && !WP2IsPremultiplied(pic.format());
   uint32_t num_colors = 0;
   // so we're sure that last_pix != argb[0]
   int16_t prev_pix[4] = {(int16_t)~src[0], 0, 0, 0};
@@ -188,11 +187,11 @@
   WP2_CHECK_STATUS(color_cache.Allocate(max_palette_size_));
 
   if (WP2Formatbpc(pic.format()) <= 8) {
-    num_colors_ =
-        CountColors(pic, pic.GetRow8(0), &color_cache, max_palette_size_);
+    num_colors_ = CountColors(pic, pic.GetRow8(0), exact_, &color_cache,
+                              max_palette_size_);
   } else {
-    num_colors_ =
-        CountColors(pic, pic.GetRow16(0), &color_cache, max_palette_size_);
+    num_colors_ = CountColors(pic, pic.GetRow16(0), exact_, &color_cache,
+                              max_palette_size_);
   }
 
   if (num_colors_ <= 1 || num_colors_ > max_palette_size_) {
@@ -212,7 +211,7 @@
 
 template <typename T>
 void ApplyT(const WP2::ArgbBuffer& pic, const T* src,
-            const ColorCacheMap& color_cache, int16_t* dst) {
+            const ColorCacheMap& color_cache, bool exact, int16_t* dst) {
   int16_t prev_pix[4] = {(int16_t)~src[0], (int16_t)~src[1], (int16_t)~src[2],
                          (int16_t)~src[3]};
   uint32_t prev_idx = 0;
@@ -220,7 +219,7 @@
     for (uint32_t x = 0; x < pic.width(); ++x, dst += 4) {
       int16_t pix[4] = {(int16_t)src[4 * x + 0], (int16_t)src[4 * x + 1],
                         (int16_t)src[4 * x + 2], (int16_t)src[4 * x + 3]};
-      if (!WP2IsPremultiplied(pic.format())) {
+      if (!exact && !WP2IsPremultiplied(pic.format())) {
         for (uint32_t c = 1; c < 4; ++c) {
           pix[c] = WP2::DivBy255(pix[c] * pix[0]);
         }
@@ -251,9 +250,9 @@
   }
 
   if (WP2Formatbpc(pic.format()) <= 8) {
-    ApplyT(pic, pic.GetRow8(0), color_cache, dst);
+    ApplyT(pic, pic.GetRow8(0), color_cache, exact_, dst);
   } else {
-    ApplyT(pic, pic.GetRow16(0), color_cache, dst);
+    ApplyT(pic, pic.GetRow16(0), color_cache, exact_, dst);
   }
 
   return WP2_STATUS_OK;
@@ -275,24 +274,25 @@
 
 class InternalParams {
  public:
-  InternalParams(WP2SampleFormat format, bool has_alpha, uint32_t channel_max,
-                 uint32_t num_channels, const WP2::Vector_s16& colors);
+  InternalParams(WP2SampleFormat format, bool has_alpha, bool exact,
+                 uint32_t channel_max, uint32_t num_channels,
+                 const WP2::Vector_s16& colors);
   // Allocates the needed space for analysis.
   WP2Status Init(const int16_t colors[]);
   // Stores the signs.
-  WP2Status StoreSigns(WP2::ANSEncBase* const bw);
+  WP2Status StoreSigns(WP2::ANSEncBase* bw);
   // Used for List method. It modifies stats_.
-  WP2Status WriteAsList(WP2::ANSEncBase* const enc);
+  WP2Status WriteAsList(WP2::ANSEncBase* enc);
   // Used for Image method.
   WP2Status WriteAsImage(const ProgressRange& progress,
-                         WP2::ANSDictionaries* const dict,
-                         Encoder* const encoder,
-                         WP2::ANSEncBase* const enc) const;
-  WP2Status WriteVerbatim(WP2::ANSEncBase* const enc) const;
+                         WP2::ANSDictionaries* dict, Encoder* encoder,
+                         WP2::ANSEncBase* enc) const;
+  WP2Status WriteVerbatim(WP2::ANSEncBase* enc) const;
 
  public:
   WP2SampleFormat format_;
   bool has_alpha_;
+  bool exact_;
   uint32_t channel_max_;
   uint32_t num_channels_;
   uint32_t num_colors_;
@@ -356,16 +356,14 @@
   for (uint32_t i = 0; i < num_colors_; ++i) {
     if (has_alpha_) {
       enc->PutRValue(colors_[4 * i + 0], channel_max[0] + 1, "color_verbatim");
-      for (uint32_t c = 1; c < num_channels_; ++c) {
-        const uint32_t max = WP2::DivRound(
-            (uint32_t)(channel_max[c] * colors_[4 * i + 0]), channel_max[0]);
-        enc->PutRValue(colors_[4 * i + c], max + 1, "color_verbatim");
-      }
-    } else {
-      for (uint32_t c = 1; c < num_channels_; ++c) {
-        enc->PutRValue(colors_[4 * i + c], channel_max[c] + 1,
-                       "color_verbatim");
-      }
+    }
+    for (uint32_t c = 1; c < num_channels_; ++c) {
+      const uint32_t max =
+          (has_alpha_ && !exact_)
+              ? WP2::DivRound((uint32_t)(channel_max[c] * colors_[4 * i + 0]),
+                              channel_max[0])
+              : channel_max[c];
+      enc->PutRValue(colors_[4 * i + c], max + 1, "color_verbatim");
     }
   }
   WP2_CHECK_STATUS(enc->GetStatus());
@@ -384,7 +382,7 @@
     const uint16_t channel_max = WP2::FormatMax(format_, c);
     const WP2::Vector_u16& diffs = channel_diffs_[c];
     uint16_t alpha_value = has_alpha_ ? channel_diffs_[0][0] : channel_max;
-    uint16_t prev_channel = diffs[0];
+    uint16_t channel_prev = diffs[0];
     // Compute the cost when storing everything as ranges.
     float cost_range = WP2Log2(channel_max + 1);
     for (uint32_t i = 1; i < num_colors_; ++i) {
@@ -393,21 +391,22 @@
       if (has_alpha_) {
         alpha_value += is_stored_as_above_[0][i] ?  channel_diffs_[0][i]
                                                  : -channel_diffs_[0][i];
-        if (c > 0 && prev_channel > alpha_value) {
-          prev_channel = alpha_value;
+        if (c > 0 && channel_prev > alpha_value && !exact_) {
+          channel_prev = alpha_value;
         }
       }
       if (is_stored_as_above_[c][i]) {
-        const uint32_t max =
-            (c == 0) ? channel_max : WP2::DivBy255(channel_max * alpha_value);
-        cost_range += WP2Log2(max + 1 - prev_channel);
-        assert(prev_channel <= max);
-        assert((int)prev_channel + (int)diffs[i] <= (int)channel_max);
-        prev_channel += diffs[i];
+        const uint32_t max = (c == 0 || exact_)
+                                 ? channel_max
+                                 : WP2::DivBy255(channel_max * alpha_value);
+        cost_range += WP2Log2(max + 1 - channel_prev);
+        assert(channel_prev <= max);
+        assert((int)channel_prev + (int)diffs[i] <= (int)channel_max);
+        channel_prev += diffs[i];
       } else {
-        cost_range += WP2Log2(prev_channel + 1);
-        assert(diffs[i] <= prev_channel);
-        prev_channel -= diffs[i];
+        cost_range += WP2Log2(channel_prev + 1);
+        assert(diffs[i] <= channel_prev);
+        channel_prev -= diffs[i];
       }
     }
     // Try using StoreVector in case values are monotonic.
@@ -417,7 +416,8 @@
     // Pick the best of the two solutions.
     const bool use_vector = cost_store_vector < cost_range;
     enc->PutBool(use_vector, "use_vector");
-    const uint32_t max_first = (c == 0) ? channel_max : channel_diffs_[0][0];
+    const uint32_t max_first =
+        (c == 0 || exact_) ? channel_max : channel_diffs_[0][0];
     enc->PutRValue(diffs[0], max_first + 1, "first_channel");
     if (use_vector) {
       enc->AddDebugPrefix("transform_index_vals");
@@ -426,22 +426,25 @@
     } else {
       // Store everything else as ranges.
       alpha_value = has_alpha_ ? channel_diffs_[0][0] : channel_max;
-      prev_channel = diffs[0];
+      channel_prev = diffs[0];
       for (uint32_t i = 1; i < num_colors_; ++i) {
         if (has_alpha_) {
           alpha_value += (is_stored_as_above_[0][i]) ?  channel_diffs_[0][i]
                                                      : -channel_diffs_[0][i];
-          if (c > 0 && prev_channel > alpha_value) prev_channel = alpha_value;
+          if (c > 0 && channel_prev > alpha_value && !exact_) {
+            channel_prev = alpha_value;
+          }
         }
         if (is_stored_as_above_[c][i]) {
-          const uint32_t max =
-              (c == 0) ? channel_max : WP2::DivBy255(channel_max * alpha_value);
-          enc->PutRValue(diffs[i], (uint32_t)(max + 1 - prev_channel),
+          const uint32_t max = (c == 0 || exact_)
+                                   ? channel_max
+                                   : WP2::DivBy255(channel_max * alpha_value);
+          enc->PutRValue(diffs[i], (uint32_t)(max + 1 - channel_prev),
                          "store_diff");
-          prev_channel += diffs[i];
+          channel_prev += diffs[i];
         } else {
-          enc->PutRValue(diffs[i], prev_channel + 1, "store_diff");
-          prev_channel -= diffs[i];
+          enc->PutRValue(diffs[i], channel_prev + 1, "store_diff");
+          channel_prev -= diffs[i];
         }
       }
     }
@@ -476,10 +479,12 @@
 }
 
 InternalParams::InternalParams(WP2SampleFormat format, bool has_alpha,
-                               uint32_t channel_max, uint32_t num_channels,
+                               bool exact, uint32_t channel_max,
+                               uint32_t num_channels,
                                const WP2::Vector_s16& colors)
     : format_(format),
       has_alpha_(has_alpha),
+      exact_(exact),
       channel_max_(channel_max),
       num_channels_(num_channels),
       num_colors_(colors.size() / 4),
@@ -503,7 +508,7 @@
     for (uint32_t i = 1; i < num_colors_; ++i) {
       if (has_alpha_) {
         const auto alpha = colors[4 * i + 0];
-        if (c > 0 && channel_prev > alpha) channel_prev = alpha;
+        if (c > 0 && channel_prev > alpha && !exact_) channel_prev = alpha;
       }
       const int16_t channel = colors[4 * i + c];
       const int diff = (int)channel - (int)channel_prev;
@@ -600,7 +605,7 @@
   const uint16_t channel_max = encoder->symbols_info_.GetMaxRange(kSymbolR) - 1;
   const uint32_t num_channels = (is_grayscale_ ? 2 : 4);
 
-  InternalParams params(format_, has_alpha_, channel_max, num_channels,
+  InternalParams params(format_, has_alpha_, exact_, channel_max, num_channels,
                         colors_);
 
   if (num_colors_ <= kSmallPaletteLimit) {
@@ -684,7 +689,7 @@
 
 template <typename T>
 WP2Status BuildCoOccurrence(const WP2::ArgbBuffer& pic, const T* src,
-                            const ColorCacheMap& color_cache,
+                            const ColorCacheMap& color_cache, bool exact,
                             CoOccurrenceMatrix* cooccurrence) {
   WP2::Vector_u16 lines;
   WP2_CHECK_ALLOC_OK(lines.resize(2 * pic.width()));
@@ -699,7 +704,7 @@
     for (uint32_t x = 0; x < pic.width(); ++x) {
       int16_t pix[4] = {(int16_t)src[4 * x + 0], (int16_t)src[4 * x + 1],
                         (int16_t)src[4 * x + 2], (int16_t)src[4 * x + 3]};
-      if (!WP2IsPremultiplied(pic.format())) {
+      if (!exact && !WP2IsPremultiplied(pic.format())) {
         for (uint32_t c = 1; c < 4; ++c) {
           pix[c] = WP2::DivBy255(pix[c] * pix[0]);
         }
@@ -738,11 +743,11 @@
   CoOccurrenceMatrix cooccurrence(num_colors_);
   WP2_CHECK_STATUS(cooccurrence.Allocate());
   if (WP2Formatbpc(pic.format()) <= 8) {
-    WP2_CHECK_STATUS(
-        BuildCoOccurrence(pic, pic.GetRow8(0), color_cache, &cooccurrence));
+    WP2_CHECK_STATUS(BuildCoOccurrence(pic, pic.GetRow8(0), color_cache, exact_,
+                                       &cooccurrence));
   } else {
-    WP2_CHECK_STATUS(
-        BuildCoOccurrence(pic, pic.GetRow16(0), color_cache, &cooccurrence));
+    WP2_CHECK_STATUS(BuildCoOccurrence(pic, pic.GetRow16(0), color_cache,
+                                       exact_, &cooccurrence));
   }
 
   // Initialize the mapping list with the two best indices.
@@ -773,7 +778,7 @@
   }
 
   // We need to append and prepend to the list of remapping. To this end, we
-  // actually define the nest start/end of the list as indices in a vector (with
+  // actually define the next start/end of the list as indices in a vector (with
   // a wrap around when the end is reached).
   uint32_t first = 0, last = 1;
   while (!sums.empty()) {
@@ -935,7 +940,7 @@
 
   const uint32_t num_channels = (is_grayscale_ ? 2 : 4);
   const uint16_t channel_max = encoder->symbols_info_.GetMaxRange(kSymbolR) - 1;
-  InternalParams params(format_, has_alpha_, channel_max, num_channels,
+  InternalParams params(format_, has_alpha_, exact_, channel_max, num_channels,
                         colors_);
 
   if (method_ == PaletteStorageMethod::kMethodVerbatim) {
diff --git a/src/enc/lossless/palette.h b/src/enc/lossless/palette.h
index 6066d26..917e7f0 100644
--- a/src/enc/lossless/palette.h
+++ b/src/enc/lossless/palette.h
@@ -21,11 +21,9 @@
 #define WP2_ENC_LOSSLESS_PALETTE_H_
 
 #include "src/common/progress_watcher.h"
-#include "src/utils/ans.h"
 #include "src/utils/ans_enc.h"
 #include "src/utils/vector.h"
 #include "src/wp2/base.h"
-#include "src/wp2/format_constants.h"
 
 namespace WP2L {
 
@@ -50,9 +48,10 @@
     kUndefined
   };
   Palette() : num_colors_(0) {}
-  void Init(WP2SampleFormat format, bool has_alpha) {
+  void Init(WP2SampleFormat format, bool has_alpha, bool exact) {
     format_ = format;
     has_alpha_ = has_alpha;
+    exact_ = exact;
   }
   // Populates the palette with colors from the given 'pic'.
   // If the unique color count is more than kMaxPaletteSize (or if there is only
@@ -110,6 +109,7 @@
  private:
   WP2SampleFormat format_;
   bool has_alpha_ = false;
+  bool exact_ = false;
   bool is_grayscale_ = false;
   WP2::Vector_s16 colors_;
   uint32_t num_colors_ = 0;        // 0 = palette is disabled
diff --git a/src/enc/lossless/predictorl_enc.cc b/src/enc/lossless/predictorl_enc.cc
index 7b609ec..e8ab51e 100644
--- a/src/enc/lossless/predictorl_enc.cc
+++ b/src/enc/lossless/predictorl_enc.cc
@@ -168,8 +168,8 @@
 // predictions.
 static inline void GetResidual(int width, int16_t* const upper_row,
                                int16_t* const current_row, bool has_alpha,
-                               const Predictor& pred, int x_start, int x_end,
-                               int y, int16_t* const out) {
+                               bool exact, const Predictor& pred, int x_start,
+                               int x_end, int y, int16_t* const out) {
   PredictorNonClampedFunc pred_non_clamped = nullptr;
   PredictorClampedFunc pred_clamped = nullptr;
   PredictorAngleFunc pred_angle = nullptr;
@@ -219,7 +219,7 @@
       }
     }
     SubPixels(&current_row[4 * x], has_alpha, predict, residual);
-    if (current_row[4 * x] == 0) {
+    if (current_row[4 * x] == 0 && !exact) {
       // If alpha is 0, cleanup RGB. We can choose the RGB values of the
       // residual for best compression. The prediction of alpha itself can be
       // non-zero and must be kept though. We choose RGB of the residual to be
@@ -247,7 +247,7 @@
 
 // Returns best predictor and updates the accumulated histogram.
 static void GetBestPredictorForTile(
-    int tile_x, int tile_y, uint32_t bits, bool use_sub_modes,
+    int tile_x, int tile_y, uint32_t bits, bool exact, bool use_sub_modes,
     const int16_t min_values[4], const int16_t max_values[4],
     const Buffer_s16& buffer, const int16_t* const modes,
     int16_t* const argb_scratch, ChannelVector accumulated[2],
@@ -351,8 +351,8 @@
                         4 * (max_x + have_left + (y + 1 < height)),
                     &current_row[4 * context_start_x]);
 
-          GetResidual(width, upper_row, current_row, buffer.has_alpha, pred,
-                      start_x, start_x + max_x, y, residuals);
+          GetResidual(width, upper_row, current_row, buffer.has_alpha, exact,
+                      pred, start_x, start_x + max_x, y, residuals);
           UpdateHisto(residuals, max_x, &histo_argb);
         }
         float cur_diff =
@@ -378,7 +378,8 @@
 }
 
 // Converts pixels of the image to residuals with respect to predictions.
-static void CopyImageWithPrediction(int bits, const int16_t min_values[4],
+static void CopyImageWithPrediction(int bits, bool exact,
+                                    const int16_t min_values[4],
                                     const int16_t max_values[4],
                                     const int16_t* const modes, int effort,
                                     const WP2::Vector_s8& sub_modes,
@@ -404,8 +405,8 @@
           /*sub_mode_in=*/sub_modes.empty() ? 0 : sub_modes[ind + 2],
           min_values, max_values);
 
-      GetResidual(width, upper_row, current_row, buffer->has_alpha, pred, x,
-                  x_end, y, buffer->GetPosition(x, y));
+      GetResidual(width, upper_row, current_row, buffer->has_alpha, exact, pred,
+                  x, x_end, y, buffer->GetPosition(x, y));
       x = x_end;
     }
   }
@@ -413,7 +414,7 @@
 
 // Finds the best predictor for each tile, and converts the image to residuals
 // with respect to predictions.
-WP2Status CreateResidualImage(uint32_t bits, bool can_use_sub_modes,
+WP2Status CreateResidualImage(uint32_t bits, bool exact, bool can_use_sub_modes,
                               const int16_t min_values[4],
                               const int16_t max_values[4], int effort,
                               Buffer_s16* const buffer,
@@ -454,9 +455,10 @@
                         max_values);
     for (uint32_t tile_y = 0; tile_y < height; ++tile_y) {
       for (uint32_t tile_x = 0; tile_x < width; ++tile_x) {
-        GetBestPredictorForTile(
-            tile_x, tile_y, bits, can_use_sub_modes, min_values, max_values,
-            *buffer, modes, argb_scratch, histo, tmp_histo, &pred, &pred_wsub);
+        GetBestPredictorForTile(tile_x, tile_y, bits, exact, can_use_sub_modes,
+                                min_values, max_values, *buffer, modes,
+                                argb_scratch, histo, tmp_histo, &pred,
+                                &pred_wsub);
         const uint32_t ind = 4 * (tile_y * width + tile_x);
         for (uint32_t c : {0, 1, 3}) modes[ind + c] = 0;
         // Store the modes in the green channel.
@@ -501,7 +503,7 @@
     }
   }
 
-  CopyImageWithPrediction(bits, min_values, max_values, modes, effort,
+  CopyImageWithPrediction(bits, exact, min_values, max_values, modes, effort,
                           *sub_modes, argb_scratch, buffer);
   return WP2_STATUS_OK;
 }
diff --git a/src/enc/main_enc.cc b/src/enc/main_enc.cc
index 440a261..f6dd315 100644
--- a/src/enc/main_enc.cc
+++ b/src/enc/main_enc.cc
@@ -301,7 +301,7 @@
 
   WP2_CHECK_STATUS(SetupEncoderInfo(input.width(), input.height(), config));
 
-  const uint32_t rgb_bit_depth = (input.format() == WP2_Argb_38) ? 10 : 8;
+  const uint32_t rgb_bit_depth = WP2Formatbpc(input.format());
   const RGB12b preview_color = GetPreviewColor(input);
   const bool has_alpha = input.HasTransparency();
   const bool has_icc = (input.metadata_.iccp.size > 0);
@@ -406,7 +406,7 @@
   // TODO(yguyon): Some of these cases could also be done directly in YUV space
   //               instead of needing RGB conversion.
 
-  ArgbBuffer rgb_input(WP2_Argb_32);
+  ArgbBuffer rgb_input(config.exact ? WP2_ARGB_32 : WP2_Argb_32);
   YUVPlane yuv_input;
   CSPTransform csp_transform;
   if (rgb_is_needed) {
@@ -435,7 +435,7 @@
     WP2_CHECK_STATUS(yuv_input.FillPad(width, height));
   }
 
-  const uint32_t rgb_bit_depth = (rgb_input.format() == WP2_Argb_38) ? 10 : 8;
+  const uint32_t rgb_bit_depth = WP2Formatbpc(rgb_input.format());
   const RGB12b preview_color =
       yuv_input.IsEmpty() ? GetPreviewColor(rgb_input)
                           : GetPreviewColor(yuv_input, csp_transform);
diff --git a/src/enc/tile_enc.cc b/src/enc/tile_enc.cc
index 242fa0a..8582ff4 100644
--- a/src/enc/tile_enc.cc
+++ b/src/enc/tile_enc.cc
@@ -181,8 +181,7 @@
   }
   WP2_CHECK_STATUS(status);
   for (EncTile& tile : tiles_layout->tiles) {
-    const uint32_t rgb_bit_depth =
-        (tile.rgb_input.format() == WP2_Argb_38) ? 10 : 8;
+    const uint32_t rgb_bit_depth = WP2Formatbpc(tile.rgb_input.format());
     const uint32_t max_num_bytes =
         GetTileMaxNumBytes(rgb_bit_depth, *tiles_layout->gparams, tile.rect);
     assert((tile.enc.GetBitstreamSize() == 0) ^ (tile.data.size == 0));
diff --git a/src/wp2/decode.h b/src/wp2/decode.h
index c434930..43ecf27 100644
--- a/src/wp2/decode.h
+++ b/src/wp2/decode.h
@@ -120,6 +120,9 @@
   DecoderInfo* info = nullptr;   // if not null, report internal stats and info
 
   static const DecoderConfig kDefault;
+
+  // Do not pre-multiply the output (for lossless only).
+  bool exact = false;
 };
 
 // This function decodes the bitstream into 'output_buffer' as Argb samples.
diff --git a/src/wp2/encode.h b/src/wp2/encode.h
index df5df0c..e0c06a3 100644
--- a/src/wp2/encode.h
+++ b/src/wp2/encode.h
@@ -55,6 +55,7 @@
   int effort = 5;               // Compression rate/speed trade-off.
                                 // Range: [0=faster-bigger .. 9=slower-better]
   bool use_av1 = false;  // Use lossy AV1 internally instead of lossy WP2.
+  bool exact = false;    // Do not pre-multiply the input (for lossless only).
 
   // Side parameters:
   // Set whether the image will be rotated or mirrored during decoding.
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 869525a..0f12f3e 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -276,6 +276,7 @@
   add_wp2_test(test_vector 1)
 
   add_wp2_test(lossless/test_color_cache 1)
+  add_wp2_test(lossless/test_exact 1)
   add_wp2_test(lossless/test_group4 1)
   add_wp2_test(lossless/test_histogram 1)
   add_wp2_test(lossless/test_lzw 1)
diff --git a/tests/fuzz/fuzz_image.cc b/tests/fuzz/fuzz_image.cc
index a33aa88..03218c5 100644
--- a/tests/fuzz/fuzz_image.cc
+++ b/tests/fuzz/fuzz_image.cc
@@ -17,11 +17,14 @@
 //
 // Author: Yannis Guyon (yguyon@google.com)
 
+#include <algorithm>
+
 #include "src/utils/orientation.h"
 #include "src/wp2/decode.h"
 #include "src/wp2/encode.h"
 #include "tests/fuzz/fuzz_utils.h"
 #include "tests/include/helpers.h"
+#include "third_party/libwebp2/public/src/wp2/base.h"
 
 namespace WP2 {
 namespace testutil {
@@ -41,9 +44,11 @@
   }
 
   // Verify that it decodes fine and that the result is close enough.
-  ArgbBuffer decoded;
-  if (Decode(memory_writer.mem_, memory_writer.size_, &decoded) !=
-      WP2_STATUS_OK) {
+  ArgbBuffer decoded(config.exact ? WP2_ARGB_32 : WP2_Argb_32);
+  WP2::DecoderConfig decoder_config = WP2::DecoderConfig::kDefault;
+  decoder_config.exact = config.exact;
+  if (Decode(memory_writer.mem_, memory_writer.size_, &decoded,
+             decoder_config) != WP2_STATUS_OK) {
     abort();
   }
   if (expected_distortion > 0.f) {
diff --git a/tests/fuzz/fuzz_image_enc_config.cc b/tests/fuzz/fuzz_image_enc_config.cc
index b6e0945..3b9aec9 100644
--- a/tests/fuzz/fuzz_image_enc_config.cc
+++ b/tests/fuzz/fuzz_image_enc_config.cc
@@ -18,6 +18,7 @@
 // Author: Yannis Guyon (yguyon@google.com)
 
 #include <iostream>
+#include <ostream>
 #include <string>
 
 #include "imageio/image_dec.h"
@@ -34,7 +35,8 @@
   if (!params.ExtractSource(&file_name, &file_data, &file_size)) abort();
 
   // Read a randomly chosen valid image file.
-  WP2::ArgbBuffer original;
+  const bool exact = params.ExtractBool();
+  WP2::ArgbBuffer original(exact ? WP2_ARGB_32 : WP2_Argb_32);
   const WP2Status read_status =
       WP2::ReadImage(file_data, file_size, &original, WP2::FileFormat::AUTO,
                      WP2::LogLevel::QUIET);
@@ -47,6 +49,7 @@
   // Extract a random config.
   WP2::EncoderConfig config;
   params.ExtractConfig(&config);
+  config.exact = exact;
 
   // Make sure the encoding stops before the fuzzer timeout.
   WP2::testutil::ProgressTimeout progress(
diff --git a/tests/lossless/test_exact.cc b/tests/lossless/test_exact.cc
new file mode 100644
index 0000000..c073d90
--- /dev/null
+++ b/tests/lossless/test_exact.cc
@@ -0,0 +1,87 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// -----------------------------------------------------------------------------
+//
+// Test lossless exact compression (no pre-multiplication).
+//
+// Author: Vincent Rabaud (vrabaud@google.com)
+
+#include <string>
+#include <tuple>
+
+#include "imageio/image_dec.h"
+#include "src/wp2/decode.h"
+#include "src/wp2/encode.h"
+#include "tests/include/helpers.h"
+
+using WP2::ArgbBuffer;
+using WP2::DecoderConfig;
+using WP2::EncoderConfig;
+using WP2::MemoryWriter;
+
+namespace WP2L {
+namespace {
+
+void EncodeDecodeExact(const ArgbBuffer &original) {
+  EncoderConfig encoder_config = EncoderConfig::kDefault;
+  encoder_config.quality = 100.f;
+  encoder_config.alpha_quality = 100.f;
+  encoder_config.exact = true;
+  DecoderConfig decoder_config = DecoderConfig::kDefault;
+  decoder_config.exact = true;
+
+  MemoryWriter memory_writer;
+  WP2Status status = Encode(original, &memory_writer, encoder_config);
+  EXPECT_EQ(status, WP2_STATUS_OK);
+
+  ArgbBuffer decompressed(WP2_ARGB_32);
+  status = Decode(memory_writer.mem_, memory_writer.size_, &decompressed,
+                  decoder_config);
+  EXPECT_EQ(status, WP2_STATUS_OK);
+
+  ASSERT_TRUE(WP2::testutil::Compare(original, decompressed, "internal"));
+}
+
+class ExactTest
+    : public testing::TestWithParam<std::tuple<uint32_t, uint32_t>> {};
+
+TEST_P(ExactTest, Random) {
+  const uint32_t width = std::get<0>(GetParam());
+  const uint32_t height = std::get<1>(GetParam());
+
+  ArgbBuffer original(WP2_ARGB_32);
+  EXPECT_EQ(original.Resize(width, height), WP2_STATUS_OK);
+  WP2::testutil::PrecalculatedRandom<65536u, uint8_t> rnd(0, 255);
+
+  rnd.Fill(original.GetRow8(0),
+           original.width() * WP2FormatBpp(original.format()),
+           original.height(), original.stride());
+
+  EncodeDecodeExact(original);
+}
+
+INSTANTIATE_TEST_SUITE_P(SlowLosslessTestWide, ExactTest,
+                         testing::Values(std::make_tuple(1, 1),
+                                         std::make_tuple(11, 12),
+                                         std::make_tuple(31, 47)));
+
+TEST(ExactTest, ReadFile) {
+  ArgbBuffer original(WP2_ARGB_32);
+  ASSERT_WP2_OK(WP2::ReadImage(
+      WP2::testutil::GetTestDataPath("alpha_ramp.png").c_str(), &original));
+  EncodeDecodeExact(original);
+}
+
+}  // namespace
+}  // namespace WP2L
diff --git a/tests/lossless/test_lzw_hash_map.cc b/tests/lossless/test_lzw_hash_map.cc
index fd99cb0..1bfbfda 100644
--- a/tests/lossless/test_lzw_hash_map.cc
+++ b/tests/lossless/test_lzw_hash_map.cc
@@ -150,7 +150,8 @@
   ArgbBuffer simple_rgb_buf;
   ASSERT_WP2_OK(BuildArgbBuffer(simple_rgb_buf));
   WP2L::Palette palette;
-  palette.Init(WP2SampleFormat::WP2_Argb_32, true);
+  palette.Init(WP2SampleFormat::WP2_Argb_32, /*has_alpha=*/true,
+               /*exact=*/false);
   ASSERT_WP2_OK(palette.AnalyzeAndCreate(simple_rgb_buf));
 
   constexpr uint32_t size = 10;
diff --git a/tests/lossless/test_palette.cc b/tests/lossless/test_palette.cc
index 2662db8..b549b24 100644
--- a/tests/lossless/test_palette.cc
+++ b/tests/lossless/test_palette.cc
@@ -14,6 +14,9 @@
 
 // Test Palette class.
 
+#include <algorithm>
+#include <tuple>
+
 #include "src/common/color_precision.h"
 #include "src/dec/lossless/losslessi_dec.h"
 #include "src/dsp/lossless/decl_dsp.h"
@@ -72,7 +75,7 @@
   // Create palette.
   WP2L::Palette palette;
   const bool has_alpha = buffer.HasTransparency();
-  palette.Init(buffer.format(), has_alpha);
+  palette.Init(buffer.format(), has_alpha, /*exact=*/false);
   ASSERT_WP2_OK(palette.AnalyzeAndCreate(buffer));
   // There could be fewer colors if we never chose some of them but it's pretty
   // unlikely so check for equality.