blob: d4c6580183095098b1b29c0b2c5581c8d275425d [file] [log] [blame]
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use crate::ffi;
use crate::models::Metadata;
use serde::{de, de::Deserializer, de::Error as DeserializerError};
use std::fmt;
use std::io::{BufReader, Read};
use zip;
pub const STREAM_BUFFER_SIZE: usize = 4096;
// Returns the expected data type for the provided file type.
fn expected_data_type(file_type: ffi::FileType) -> Result<&'static str, &'static str> {
match file_type {
ffi::FileType::SafariHistory => Ok("history"),
ffi::FileType::StablePortabilityHistory => Ok("history_visits"),
ffi::FileType::PaymentCards => Ok("payment_cards"),
_ => Err("No data type for this file type"),
}
}
// Returns the expected array token for the provided file type.
fn array_token_for_data_type(file_type: ffi::FileType) -> Result<&'static str, &'static str> {
match file_type {
ffi::FileType::SafariHistory => Ok("history"),
ffi::FileType::StablePortabilityHistory => Ok("history_visits"),
ffi::FileType::PaymentCards => Ok("payment_cards"),
_ => Err("No array token for this file type"),
}
}
/// A custom reader that wraps a `zip::read::ZipFile` to implement
/// `io::BufRead`. This allows `serde_json_lenient` to efficiently read from the
/// zip entry without loading the entire entry into memory.
pub struct ZipEntryBufReader<'a, R: Read> {
pub inner: BufReader<zip::read::ZipFile<'a, R>>,
}
impl<'a, R: Read> ZipEntryBufReader<'a, R> {
pub fn new(zip_file: zip::read::ZipFile<'a, R>) -> Self {
ZipEntryBufReader { inner: BufReader::with_capacity(STREAM_BUFFER_SIZE, zip_file) }
}
}
struct ArrayDeserializerSeed<'de, T>(Box<dyn FnMut(T) + 'de>)
where
T: de::DeserializeOwned;
impl<'de, 'a, T> de::DeserializeSeed<'de> for ArrayDeserializerSeed<'de, T>
where
T: de::DeserializeOwned,
{
// The return type of the `deserialize` method. This implementation
// passes elements into `callback` but does not create any new data
// structure, so the return type is ().
type Value = ();
fn deserialize<D>(self, deserializer: D) -> Result<(), D::Error>
where
D: de::Deserializer<'de>,
{
struct SeqVisitor<'de, T>(Box<dyn FnMut(T) + 'de>);
impl<'de, T> de::Visitor<'de> for SeqVisitor<'de, T>
where
T: de::DeserializeOwned,
{
type Value = ();
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("array")
}
fn visit_seq<S>(mut self, mut seq: S) -> Result<(), S::Error>
where
S: de::SeqAccess<'de>,
{
while let Some(value) = seq.next_element::<serde_json_lenient::Value>()? {
if let Ok(t) = serde_json_lenient::from_value(value) {
self.0(t);
}
}
Ok(())
}
}
deserializer.deserialize_seq(SeqVisitor(self.0))
}
}
pub fn deserialize_top_level<'de, T, R>(
mut stream_reader: BufReader<R>,
file_type: ffi::FileType,
callback: impl FnMut(T) + 'de,
metadata_only: bool,
) -> Result<(), String>
where
T: de::DeserializeOwned + 'de,
R: std::io::Read,
{
const VALID_PARTIAL_DESERIALIZATION: &'static str = "Valid partial deserialization";
struct MapVisitor<'de, T>
where
T: de::DeserializeOwned,
{
file_type: ffi::FileType,
callback: Box<dyn FnMut(T) + 'de>,
metadata_only: bool,
}
impl<'de, T> de::Visitor<'de> for MapVisitor<'de, T>
where
T: de::DeserializeOwned + 'de,
{
type Value = ();
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("map/object")
}
fn visit_map<M>(self, mut map: M) -> Result<(), M::Error>
where
M: de::MapAccess<'de>,
{
const METADATA_TOKEN: &'static str = "metadata";
let Ok(data_type) = expected_data_type(self.file_type) else {
return Err(DeserializerError::custom("File type has no associated data type"));
};
let Ok(expected_key) = array_token_for_data_type(self.file_type) else {
return Err(DeserializerError::custom("File type has no associated array token"));
};
let mut has_expected_data_type = false;
while let Some(actual_key) = map.next_key::<String>()? {
if actual_key == METADATA_TOKEN {
if has_expected_data_type {
return Err(DeserializerError::custom("Multiple metadata tokens"));
}
let metadata = map.next_value::<Metadata>()?;
has_expected_data_type = metadata.data_type == data_type;
if !has_expected_data_type {
return Err(DeserializerError::custom("Unexpected data type"));
} else if self.metadata_only {
// If only the data type check is required, it has been performed
// successfully, so no further deserialization is required. To prevent
// deserialize_map from generating an error caused by the deserialization
// being incomplete, a valid partial deserialization error is returned here
// and will be interpreted as a valid result below.
return Err(DeserializerError::custom(VALID_PARTIAL_DESERIALIZATION));
}
} else if actual_key == expected_key {
if !has_expected_data_type {
return Err(DeserializerError::custom("Found array before metadata"));
}
map.next_value_seed(ArrayDeserializerSeed(Box::new(self.callback)))?;
// At this point, the user data array has been parsed successfully, so no
// further deserialization is required. To prevent deserialize_map from
// generating an error caused by the deserialization being incomplete, a valid
// partial deserialization error is returned here and will be interpreted as a
// valid result below.
return Err(DeserializerError::custom(VALID_PARTIAL_DESERIALIZATION));
} else {
let de::IgnoredAny = map.next_value()?;
}
}
Err(DeserializerError::custom("Array not found"))
}
}
let callback = Box::new(callback);
let mut d = serde_json_lenient::Deserializer::from_reader(&mut stream_reader);
match d.deserialize_map(MapVisitor { file_type, callback, metadata_only }) {
Ok(_) => Ok(()),
Err(e) => {
// If the error is a valid partial deserialization error, then all the required
// tasks have been completed successfully and deserialization was stopped early
// to prevent any further unnecessary work, so Ok(()) can be returned in this
// case.
if e.to_string().starts_with(VALID_PARTIAL_DESERIALIZATION) {
return Ok(());
}
return Err(e.to_string());
}
}
}