blob: b2875f3c82ae8d0739bf6b104ce430ac830a9680 [file] [log] [blame]
# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish, dis-
# tribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the fol-
# lowing conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
#
from hashlib import sha256
import itertools
from boto.compat import StringIO
from tests.unit import unittest
from mock import (
call,
Mock,
sentinel,
)
from nose.tools import assert_equal
from boto.glacier.layer1 import Layer1
from boto.glacier.vault import Vault
from boto.glacier.writer import Writer, resume_file_upload
from boto.glacier.utils import bytes_to_hex, chunk_hashes, tree_hash
def create_mock_vault():
vault = Mock(spec=Vault)
vault.layer1 = Mock(spec=Layer1)
vault.layer1.complete_multipart_upload.return_value = dict(
ArchiveId=sentinel.archive_id)
vault.name = sentinel.vault_name
return vault
def partify(data, part_size):
for i in itertools.count(0):
start = i * part_size
part = data[start:start + part_size]
if part:
yield part
else:
return
def calculate_mock_vault_calls(data, part_size, chunk_size):
upload_part_calls = []
data_tree_hashes = []
for i, data_part in enumerate(partify(data, part_size)):
start = i * part_size
end = start + len(data_part)
data_part_tree_hash_blob = tree_hash(
chunk_hashes(data_part, chunk_size))
data_part_tree_hash = bytes_to_hex(data_part_tree_hash_blob)
data_part_linear_hash = sha256(data_part).hexdigest()
upload_part_calls.append(
call.layer1.upload_part(
sentinel.vault_name, sentinel.upload_id,
data_part_linear_hash, data_part_tree_hash,
(start, end - 1), data_part))
data_tree_hashes.append(data_part_tree_hash_blob)
return upload_part_calls, data_tree_hashes
def check_mock_vault_calls(vault, upload_part_calls, data_tree_hashes,
data_len):
vault.layer1.upload_part.assert_has_calls(
upload_part_calls, any_order=True)
assert_equal(
len(upload_part_calls), vault.layer1.upload_part.call_count)
data_tree_hash = bytes_to_hex(tree_hash(data_tree_hashes))
vault.layer1.complete_multipart_upload.assert_called_once_with(
sentinel.vault_name, sentinel.upload_id, data_tree_hash, data_len)
class TestWriter(unittest.TestCase):
def setUp(self):
super(TestWriter, self).setUp()
self.vault = create_mock_vault()
self.chunk_size = 2 # power of 2
self.part_size = 4 # power of 2
upload_id = sentinel.upload_id
self.writer = Writer(
self.vault, upload_id, self.part_size, self.chunk_size)
def check_write(self, write_list):
for write_data in write_list:
self.writer.write(write_data)
self.writer.close()
data = b''.join(write_list)
upload_part_calls, data_tree_hashes = calculate_mock_vault_calls(
data, self.part_size, self.chunk_size)
check_mock_vault_calls(
self.vault, upload_part_calls, data_tree_hashes, len(data))
def test_single_byte_write(self):
self.check_write([b'1'])
def test_one_part_write(self):
self.check_write([b'1234'])
def test_split_write_1(self):
self.check_write([b'1', b'234'])
def test_split_write_2(self):
self.check_write([b'12', b'34'])
def test_split_write_3(self):
self.check_write([b'123', b'4'])
def test_one_part_plus_one_write(self):
self.check_write([b'12345'])
def test_returns_archive_id(self):
self.writer.write(b'1')
self.writer.close()
self.assertEquals(sentinel.archive_id, self.writer.get_archive_id())
def test_current_tree_hash(self):
self.writer.write(b'1234')
self.writer.write(b'567')
hash_1 = self.writer.current_tree_hash
self.assertEqual(hash_1,
b'\x0e\xb0\x11Z\x1d\x1f\n\x10|\xf76\xa6\xf5' +
b'\x83\xd1\xd5"bU\x0c\x95\xa8<\xf5\x81\xef\x0e\x0f\x95\n\xb7k'
)
# This hash will be different, since the content has changed.
self.writer.write(b'22i3uy')
hash_2 = self.writer.current_tree_hash
self.assertEqual(hash_2,
b'\x7f\xf4\x97\x82U]\x81R\x05#^\xe8\x1c\xd19' +
b'\xe8\x1f\x9e\xe0\x1aO\xaad\xe5\x06"\xa5\xc0\xa8AdL'
)
self.writer.close()
# Check the final tree hash, post-close.
final_hash = self.writer.current_tree_hash
self.assertEqual(final_hash,
b';\x1a\xb8!=\xf0\x14#\x83\x11\xd5\x0b\x0f' +
b'\xc7D\xe4\x8e\xd1W\x99z\x14\x06\xb9D\xd0\xf0*\x93\xa2\x8e\xf9'
)
# Then assert we don't get a different one on a subsequent call.
self.assertEqual(final_hash, self.writer.current_tree_hash)
def test_current_uploaded_size(self):
self.writer.write(b'1234')
self.writer.write(b'567')
size_1 = self.writer.current_uploaded_size
self.assertEqual(size_1, 4)
# This hash will be different, since the content has changed.
self.writer.write(b'22i3uy')
size_2 = self.writer.current_uploaded_size
self.assertEqual(size_2, 12)
self.writer.close()
# Get the final size, post-close.
final_size = self.writer.current_uploaded_size
self.assertEqual(final_size, 13)
# Then assert we don't get a different one on a subsequent call.
self.assertEqual(final_size, self.writer.current_uploaded_size)
def test_upload_id(self):
self.assertEquals(sentinel.upload_id, self.writer.upload_id)
class TestResume(unittest.TestCase):
def setUp(self):
super(TestResume, self).setUp()
self.vault = create_mock_vault()
self.chunk_size = 2 # power of 2
self.part_size = 4 # power of 2
def check_no_resume(self, data, resume_set=set()):
fobj = StringIO(data.decode('utf-8'))
part_hash_map = {}
for part_index in resume_set:
start = self.part_size * part_index
end = start + self.part_size
part_data = data[start:end]
part_hash_map[part_index] = tree_hash(
chunk_hashes(part_data, self.chunk_size))
resume_file_upload(
self.vault, sentinel.upload_id, self.part_size, fobj,
part_hash_map, self.chunk_size)
upload_part_calls, data_tree_hashes = calculate_mock_vault_calls(
data, self.part_size, self.chunk_size)
resume_upload_part_calls = [
call for part_index, call in enumerate(upload_part_calls)
if part_index not in resume_set]
check_mock_vault_calls(
self.vault, resume_upload_part_calls, data_tree_hashes, len(data))
def test_one_part_no_resume(self):
self.check_no_resume(b'1234')
def test_two_parts_no_resume(self):
self.check_no_resume(b'12345678')
def test_one_part_resume(self):
self.check_no_resume(b'1234', resume_set=set([0]))
def test_two_parts_one_resume(self):
self.check_no_resume(b'12345678', resume_set=set([1]))
def test_returns_archive_id(self):
archive_id = resume_file_upload(
self.vault, sentinel.upload_id, self.part_size, StringIO('1'), {},
self.chunk_size)
self.assertEquals(sentinel.archive_id, archive_id)