Merge "fdts: enable virtIO P9 device for morello fvp platform" into integration
diff --git a/Makefile b/Makefile
index f899dac..b6c8b21 100644
--- a/Makefile
+++ b/Makefile
@@ -895,6 +895,7 @@
         DYN_DISABLE_AUTH \
         EL3_EXCEPTION_HANDLING \
         ENABLE_AMU \
+        AMU_RESTRICT_COUNTERS \
         ENABLE_ASSERTIONS \
         ENABLE_MPAM_FOR_LOWER_ELS \
         ENABLE_PIE \
@@ -984,6 +985,7 @@
         DECRYPTION_SUPPORT_${DECRYPTION_SUPPORT} \
         DISABLE_MTPMU \
         ENABLE_AMU \
+        AMU_RESTRICT_COUNTERS \
         ENABLE_ASSERTIONS \
         ENABLE_BTI \
         ENABLE_MPAM_FOR_LOWER_ELS \
diff --git a/docs/getting_started/build-options.rst b/docs/getting_started/build-options.rst
index c520e0c..5935b4e 100644
--- a/docs/getting_started/build-options.rst
+++ b/docs/getting_started/build-options.rst
@@ -22,6 +22,10 @@
    directory containing the SP source, relative to the ``bl32/``; the directory
    is expected to contain a makefile called ``<aarch32_sp-value>.mk``.
 
+-  ``AMU_RESTRICT_COUNTERS``: Register reads to the group 1 counters will return
+   zero at all but the highest implemented exception level.  Reads from the
+   memory mapped view are unaffected by this control.
+
 -  ``ARCH`` : Choose the target build architecture for TF-A. It can take either
    ``aarch64`` or ``aarch32`` as values. By default, it is defined to
    ``aarch64``.
diff --git a/drivers/marvell/iob.c b/drivers/marvell/iob.c
index 87f147a..29088aa 100644
--- a/drivers/marvell/iob.c
+++ b/drivers/marvell/iob.c
@@ -44,6 +44,10 @@
 #define IOB_WIN_ALR_OFFSET(win)		(iob_base + 0x8 + (0x20 * win))
 #define IOB_WIN_AHR_OFFSET(win)		(iob_base + 0xC + (0x20 * win))
 
+#define IOB_WIN_DIOB_CR_OFFSET(win)	(iob_base + 0x10 + (0x20 * win))
+#define IOB_WIN_XOR0_DIOB_EN		BIT(0)
+#define IOB_WIN_XOR1_DIOB_EN		BIT(1)
+
 uintptr_t iob_base;
 
 static void iob_win_check(struct addr_map_win *win, uint32_t win_num)
@@ -71,6 +75,17 @@
 	uint32_t iob_win_reg;
 	uint32_t alr, ahr;
 	uint64_t end_addr;
+	uint32_t reg_en;
+
+	/* move XOR (DMA) to use WIN1 which is used for PCI-EP address space */
+	reg_en = IOB_WIN_XOR0_DIOB_EN | IOB_WIN_XOR1_DIOB_EN;
+	iob_win_reg = mmio_read_32(IOB_WIN_DIOB_CR_OFFSET(0));
+	iob_win_reg &= ~reg_en;
+	mmio_write_32(IOB_WIN_DIOB_CR_OFFSET(0), iob_win_reg);
+
+	iob_win_reg = mmio_read_32(IOB_WIN_DIOB_CR_OFFSET(1));
+	iob_win_reg |= reg_en;
+	mmio_write_32(IOB_WIN_DIOB_CR_OFFSET(1), iob_win_reg);
 
 	end_addr = (win->base_addr + win->win_size - 1);
 	alr = (uint32_t)((win->base_addr >> ADDRESS_SHIFT) & ADDRESS_MASK);
diff --git a/drivers/marvell/mochi/ap807_setup.c b/drivers/marvell/mochi/ap807_setup.c
index 1069f8c..75e9654 100644
--- a/drivers/marvell/mochi/ap807_setup.c
+++ b/drivers/marvell/mochi/ap807_setup.c
@@ -15,8 +15,9 @@
 #include <drivers/marvell/mci.h>
 #include <drivers/marvell/mochi/ap_setup.h>
 #include <lib/mmio.h>
+#include <lib/utils_def.h>
 
-#include <mvebu_def.h>
+#include <a8k_plat_def.h>
 
 #define SMMU_sACR				(MVEBU_SMMU_BASE + 0x10)
 #define SMMU_sACR_PG_64K			(1 << 16)
@@ -71,6 +72,23 @@
 #define MVEBU_AXI_ATTR_REG(index)		(MVEBU_AXI_ATTR_BASE + \
 							0x4 * index)
 
+#define XOR_STREAM_ID_REG(ch)	(MVEBU_REGS_BASE + 0x410010 + (ch) * 0x20000)
+#define XOR_STREAM_ID_MASK	0xFFFF
+#define SDIO_STREAM_ID_REG	(MVEBU_RFU_BASE + 0x4600)
+#define SDIO_STREAM_ID_MASK	0xFF
+
+/* Do not use the default Stream ID 0 */
+#define A807_STREAM_ID_BASE	(0x1)
+
+static uintptr_t stream_id_reg[] = {
+	XOR_STREAM_ID_REG(0),
+	XOR_STREAM_ID_REG(1),
+	XOR_STREAM_ID_REG(2),
+	XOR_STREAM_ID_REG(3),
+	SDIO_STREAM_ID_REG,
+	0
+};
+
 enum axi_attr {
 	AXI_SDIO_ATTR = 0,
 	AXI_DFX_ATTR,
@@ -162,6 +180,21 @@
 				  MCI_REMAP_OFF_SHIFT);
 }
 
+/* Set a unique stream id for all DMA capable devices */
+static void ap807_stream_id_init(void)
+{
+	uint32_t i;
+
+	for (i = 0;
+	     stream_id_reg[i] != 0 && i < ARRAY_SIZE(stream_id_reg); i++) {
+		uint32_t mask = stream_id_reg[i] == SDIO_STREAM_ID_REG ?
+				SDIO_STREAM_ID_MASK : XOR_STREAM_ID_MASK;
+
+		mmio_clrsetbits_32(stream_id_reg[i], mask,
+				   i + A807_STREAM_ID_BASE);
+	}
+}
+
 static void ap807_axi_attr_init(void)
 {
 	uint32_t index, data;
@@ -265,6 +298,9 @@
 	/* configure CCU windows */
 	init_ccu(MVEBU_AP0);
 
+	/* Set the stream IDs for DMA masters */
+	ap807_stream_id_init();
+
 	/* configure the SMMU */
 	setup_smmu();
 
diff --git a/drivers/marvell/mochi/apn806_setup.c b/drivers/marvell/mochi/apn806_setup.c
index 8c3ba92..5c71fed 100644
--- a/drivers/marvell/mochi/apn806_setup.c
+++ b/drivers/marvell/mochi/apn806_setup.c
@@ -15,7 +15,7 @@
 #include <drivers/marvell/mochi/ap_setup.h>
 #include <lib/mmio.h>
 
-#include <mvebu_def.h>
+#include <a8k_plat_def.h>
 
 #define SMMU_sACR				(MVEBU_SMMU_BASE + 0x10)
 #define SMMU_sACR_PG_64K			(1 << 16)
@@ -67,6 +67,23 @@
 #define MVEBU_AXI_ATTR_REG(index)		(MVEBU_AXI_ATTR_BASE + \
 							0x4 * index)
 
+#define XOR_STREAM_ID_REG(ch)	(MVEBU_REGS_BASE + 0x410010 + (ch) * 0x20000)
+#define XOR_STREAM_ID_MASK	0xFFFF
+#define SDIO_STREAM_ID_REG	(MVEBU_RFU_BASE + 0x4600)
+#define SDIO_STREAM_ID_MASK	0xFF
+
+/* Do not use the default Stream ID 0 */
+#define A806_STREAM_ID_BASE	(0x1)
+
+static uintptr_t stream_id_reg[] = {
+	XOR_STREAM_ID_REG(0),
+	XOR_STREAM_ID_REG(1),
+	XOR_STREAM_ID_REG(2),
+	XOR_STREAM_ID_REG(3),
+	SDIO_STREAM_ID_REG,
+	0
+};
+
 enum axi_attr {
 	AXI_SDIO_ATTR = 0,
 	AXI_DFX_ATTR,
@@ -158,6 +175,20 @@
 			      MCI_REMAP_OFF_SHIFT);
 }
 
+/* Set a unique stream id for all DMA capable devices */
+static void ap806_stream_id_init(void)
+{
+	int i;
+
+	for (i = 0; stream_id_reg[i] != 0; i++) {
+		uint32_t mask = stream_id_reg[i] == SDIO_STREAM_ID_REG ?
+				SDIO_STREAM_ID_MASK : XOR_STREAM_ID_MASK;
+
+		mmio_clrsetbits_32(stream_id_reg[i], mask,
+				   i + A806_STREAM_ID_BASE);
+	}
+}
+
 static void apn806_axi_attr_init(void)
 {
 	uint32_t index, data;
@@ -236,6 +267,9 @@
 	/* configure DSS */
 	dss_setup();
 
+	/* Set the stream IDs for DMA masters */
+	ap806_stream_id_init();
+
 	/* configure the SMMU */
 	setup_smmu();
 
diff --git a/drivers/marvell/mochi/cp110_setup.c b/drivers/marvell/mochi/cp110_setup.c
index 0fa0497..906df66 100644
--- a/drivers/marvell/mochi/cp110_setup.c
+++ b/drivers/marvell/mochi/cp110_setup.c
@@ -12,6 +12,7 @@
 #include <drivers/marvell/amb_adec.h>
 #include <drivers/marvell/iob.h>
 #include <drivers/marvell/mochi/cp110_setup.h>
+#include <drivers/rambus/trng_ip_76.h>
 
 #include <plat_marvell.h>
 
@@ -105,6 +106,11 @@
 #define MVEBU_RTC_READ_OUTPUT_DELAY_MASK		0xFFFF
 #define MVEBU_RTC_READ_OUTPUT_DELAY_DEFAULT		0x1F
 
+/*******************************************************************************
+ * TRNG Configuration
+ ******************************************************************************/
+#define MVEBU_TRNG_BASE					(0x760000)
+
 enum axi_attr {
 	AXI_ADUNIT_ATTR = 0,
 	AXI_COMUNIT_ATTR,
@@ -130,7 +136,7 @@
 #define USB3H_1_STREAM_ID_REG	(RFU_STREAM_ID_BASE + 0x10)
 #define SATA_0_STREAM_ID_REG	(RFU_STREAM_ID_BASE + 0x14)
 #define SATA_1_STREAM_ID_REG	(RFU_STREAM_ID_BASE + 0x18)
-#define SDIO_0_STREAM_ID_REG	(RFU_STREAM_ID_BASE + 0x28)
+#define SDIO_STREAM_ID_REG	(RFU_STREAM_ID_BASE + 0x28)
 
 #define CP_DMA_0_STREAM_ID_REG  (0x6B0010)
 #define CP_DMA_1_STREAM_ID_REG  (0x6D0010)
@@ -138,14 +144,14 @@
 /* We allocate IDs 128-255 for PCIe */
 #define MAX_STREAM_ID		(0x80)
 
-uintptr_t stream_id_reg[] = {
+static uintptr_t stream_id_reg[] = {
 	USB3H_0_STREAM_ID_REG,
 	USB3H_1_STREAM_ID_REG,
 	CP_DMA_0_STREAM_ID_REG,
 	CP_DMA_1_STREAM_ID_REG,
 	SATA_0_STREAM_ID_REG,
 	SATA_1_STREAM_ID_REG,
-	SDIO_0_STREAM_ID_REG,
+	SDIO_STREAM_ID_REG,
 	0
 };
 
@@ -180,8 +186,9 @@
 	pcie0_clk = (reg & SAR_PCIE0_CLK_CFG_MASK) >> SAR_PCIE0_CLK_CFG_OFFSET;
 	pcie1_clk = (reg & SAR_PCIE1_CLK_CFG_MASK) >> SAR_PCIE1_CLK_CFG_OFFSET;
 
-	/* CP110 revision A2 */
-	if (cp110_rev_id_get(base) == MVEBU_CP110_REF_ID_A2) {
+	/* CP110 revision A2 or CN913x */
+	if (cp110_rev_id_get(base) == MVEBU_CP110_REF_ID_A2 ||
+	    cp110_device_id_get(base) == MVEBU_CN9130_DEV_ID) {
 		/*
 		 * PCIe Reference Clock Buffer Control register must be
 		 * set according to the clock direction (input/output)
@@ -378,6 +385,20 @@
 	init_amb_adec(base);
 }
 
+static void cp110_trng_init(uintptr_t base)
+{
+	static bool done;
+	int ret;
+
+	if (!done) {
+		ret = eip76_rng_probe(base + MVEBU_TRNG_BASE);
+		if (ret != 0) {
+			ERROR("Failed to init TRNG @ 0x%lx\n", base);
+			return;
+		}
+		done = true;
+	}
+}
 void cp110_init(uintptr_t cp110_base, uint32_t stream_id)
 {
 	INFO("%s: Initialize CPx - base = %lx\n", __func__, cp110_base);
@@ -405,6 +426,9 @@
 
 	/* Reset RTC if needed */
 	cp110_rtc_init(cp110_base);
+
+	/* TRNG init - for CP0 only */
+	cp110_trng_init(cp110_base);
 }
 
 /* Do the minimal setup required to configure the CP in BLE */
diff --git a/drivers/rambus/trng_ip_76.c b/drivers/rambus/trng_ip_76.c
new file mode 100644
index 0000000..8de12e9
--- /dev/null
+++ b/drivers/rambus/trng_ip_76.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2020, Marvell Technology Group Ltd. All rights reserved.
+ *
+ * Based on Linux kernel omap-rng.c - RNG driver for TI OMAP CPU family
+ *
+ * Author: Deepak Saxena <dsaxena@plexity.net>
+ *
+ * Copyright 2005 (c) MontaVista Software, Inc.
+ *
+ * Mostly based on original driver:
+ *
+ * Copyright (C) 2005 Nokia Corporation
+ * Author: Juha Yrjölä <juha.yrjola@nokia.com>
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+
+#include <common/debug.h>
+#include <drivers/delay_timer.h>
+#include <drivers/rambus/trng_ip_76.h>
+#include <lib/mmio.h>
+#include <lib/spinlock.h>
+#include <lib/utils.h>
+
+#define RNG_REG_STATUS_RDY			(1 << 0)
+
+#define RNG_REG_INTACK_RDY_MASK			(1 << 0)
+
+#define RNG_CONTROL_ENABLE_TRNG_MASK		(1 << 10)
+
+#define RNG_CONFIG_NOISE_BLOCKS(val)		((0xff & (val)) << 0)
+#define RNG_CONFIG_NOISE_BLK_VAL		0x5
+
+#define RNG_CONFIG_SAMPLE_CYCLES(val)		((0xff & (val)) << 16)
+#define RNG_CONFIG_SAMPLE_CYCLES_VAL		0x22
+
+#define RNG_REG_FRO_ENABLE_MASK			0xffffff
+#define RNG_REG_FRO_DETUNE_MASK			0x0
+
+#define EIP76_RNG_OUTPUT_SIZE			0x10
+#define EIP76_RNG_WAIT_ROUNDS			10
+
+#define RNG_HW_IS_EIP76(ver)			((ver) & (0xff == 0x4C))
+#define RNG_HW_VER_MAJOR(ver)			(((ver) & (0xf << 24)) >> 24)
+#define RNG_HW_VER_MINOR(ver)			(((ver) & (0xf << 20)) >> 20)
+#define RNG_HW_VER_PATCH(ver)			(((ver) & (0xf << 16)) >> 16)
+
+
+enum {
+	RNG_OUTPUT_0_REG = 0,
+	RNG_OUTPUT_1_REG,
+	RNG_OUTPUT_2_REG,
+	RNG_OUTPUT_3_REG,
+	RNG_STATUS_REG,
+	RNG_INTMASK_REG,
+	RNG_INTACK_REG,
+	RNG_CONTROL_REG,
+	RNG_CONFIG_REG,
+	RNG_ALARMCNT_REG,
+	RNG_FROENABLE_REG,
+	RNG_FRODETUNE_REG,
+	RNG_ALARMMASK_REG,
+	RNG_ALARMSTOP_REG,
+	RNG_REV_REG
+};
+
+static uint16_t reg_map_eip76[] = {
+	[RNG_OUTPUT_0_REG]	= 0x0,
+	[RNG_OUTPUT_1_REG]	= 0x4,
+	[RNG_OUTPUT_2_REG]	= 0x8,
+	[RNG_OUTPUT_3_REG]	= 0xc,
+	[RNG_STATUS_REG]	= 0x10,
+	[RNG_INTACK_REG]	= 0x10,
+	[RNG_CONTROL_REG]	= 0x14,
+	[RNG_CONFIG_REG]	= 0x18,
+	[RNG_ALARMCNT_REG]	= 0x1c,
+	[RNG_FROENABLE_REG]	= 0x20,
+	[RNG_FRODETUNE_REG]	= 0x24,
+	[RNG_ALARMMASK_REG]	= 0x28,
+	[RNG_ALARMSTOP_REG]	= 0x2c,
+	[RNG_REV_REG]		= 0x7c,
+};
+
+struct eip76_rng_dev {
+	uintptr_t	base;
+	uint16_t	*regs;
+};
+
+/* Locals */
+static struct eip76_rng_dev eip76_dev;
+static spinlock_t rng_lock;
+
+static inline uint32_t eip76_rng_read(struct eip76_rng_dev *dev, uint16_t reg)
+{
+	return mmio_read_32(dev->base + dev->regs[reg]);
+}
+
+static inline void eip76_rng_write(struct eip76_rng_dev *dev,
+				   uint16_t reg, uint32_t val)
+{
+	mmio_write_32(dev->base + dev->regs[reg], val);
+}
+
+static void eip76_rng_init(struct eip76_rng_dev *dev)
+{
+	uint32_t val;
+
+	/* Return if RNG is already running. */
+	if (eip76_rng_read(dev, RNG_CONTROL_REG) &
+			   RNG_CONTROL_ENABLE_TRNG_MASK) {
+		return;
+	}
+
+	/*  This field sets the number of 512-bit blocks of raw Noise Source
+	 * output data that must be processed by either the Conditioning
+	 * Function or the SP 800-90 DRBG ‘BC_DF’ functionality to yield
+	 * a ‘full entropy’ output value. As according to [SP 800-90B draft]
+	 * the amount of entropy input to this functionality must be twice
+	 * the amount that is output and the 8-bit samples output by the Noise
+	 * Source are supposed to have one bit of entropy each, the settings
+	 * for this field are as follows:
+	 * - SHA-1 Conditioning Function:
+	 *  generates 160 bits output, requiring 2560 sample bits,
+	 *  equivalent to 5 blocks of raw Noise Source input.
+	 * - SHA-256 Conditioning Function:
+	 *  generates 256 bits output, requiring 4096 sample bits, equivalent
+	 *  to 8 blocks of raw Noise Source input. Note that two blocks of 256
+	 *  bits are needed to start or re-seed the SP 800-90 DRBG
+	 *  (in the EIP-76d-*-SHA2 configurations)
+	 * - SP 800-90 DRBG ‘BC_DF’ functionality:
+	 *  generates 384 bits output, requiring 6144 sample bits, equivalent
+	 *  to 12 blocks of raw Noise Source input.
+	 *  This field can only be modified when ‘enable_trng’ in TRNG_CONTROL
+	 *  is ‘0’ or when either of the ‘test_known_noise’ or ‘test_cond_func’
+	 *  bits in TRNG_TEST is ‘1’. Value 0 in this field selects 256 blocks
+	 *  of 512 bits to be processed.
+	 */
+	val = RNG_CONFIG_NOISE_BLOCKS(RNG_CONFIG_NOISE_BLK_VAL);
+
+	/* This field sets the number of FRO samples that are XOR-ed together
+	 * into one bit to be shifted into the main shift register.
+	 * This value must be such that there is at least one bit of entropy
+	 * (in total) in each 8 bits that are shifted.
+	 * This field can only be modified when ‘enable_trng’ in TRNG_CONTROL
+	 * is ‘0’ or when either of the ‘test_known_noise’ or ‘test_cond_func’
+	 * bits in TRNG_TEST is ‘1’. Value 0 in this field selects 65536 FRO
+	 * samples to be XOR-ed together
+	 */
+	val |= RNG_CONFIG_SAMPLE_CYCLES(RNG_CONFIG_SAMPLE_CYCLES_VAL);
+	eip76_rng_write(dev, RNG_CONFIG_REG, val);
+
+	/* Enable all available FROs */
+	eip76_rng_write(dev, RNG_FRODETUNE_REG, RNG_REG_FRO_DETUNE_MASK);
+	eip76_rng_write(dev, RNG_FROENABLE_REG, RNG_REG_FRO_ENABLE_MASK);
+
+	/* Enable TRNG */
+	eip76_rng_write(dev, RNG_CONTROL_REG, RNG_CONTROL_ENABLE_TRNG_MASK);
+}
+
+int32_t eip76_rng_read_rand_buf(void *data, bool wait)
+{
+	uint32_t i, present;
+
+	if (!eip76_dev.base) /* not initialized */
+		return -1;
+
+	for (i = 0; i < EIP76_RNG_WAIT_ROUNDS; i++) {
+		present = eip76_rng_read(&eip76_dev, RNG_STATUS_REG) &
+					 RNG_REG_STATUS_RDY;
+		if (present || !wait) {
+			break;
+		}
+
+		udelay(10);
+	}
+
+	if (present != 0U) {
+		return 0;
+	}
+
+	memcpy(data,
+	       (void *)(eip76_dev.base + eip76_dev.regs[RNG_OUTPUT_0_REG]),
+	       EIP76_RNG_OUTPUT_SIZE);
+
+	eip76_rng_write(&eip76_dev, RNG_INTACK_REG, RNG_REG_INTACK_RDY_MASK);
+
+	return EIP76_RNG_OUTPUT_SIZE;
+}
+
+int32_t eip76_rng_probe(uintptr_t base_addr)
+{
+	uint32_t ver;
+
+	eip76_dev.base = base_addr;
+	eip76_dev.regs = reg_map_eip76;
+
+	eip76_rng_init(&eip76_dev);
+
+	ver = eip76_rng_read(&eip76_dev, RNG_REV_REG);
+
+	INFO("%s Random Number Generator HW ver. %01x.%01x.%01x\n",
+	     RNG_HW_IS_EIP76(ver) ? "TRNG-IP-76" : "Unknown",
+	     RNG_HW_VER_MAJOR(ver), RNG_HW_VER_MINOR(ver),
+	     RNG_HW_VER_PATCH(ver));
+
+	return 0;
+}
+
+int32_t eip76_rng_get_random(uint8_t *data, uint32_t len)
+{
+	static uint8_t rand[EIP76_RNG_OUTPUT_SIZE];
+	static uint8_t pos;
+	uint32_t i;
+	int32_t ret = 0;
+
+	if (!data)
+		return -1;
+
+	spin_lock(&rng_lock);
+
+	for (i = 0; i < len; i++) {
+		if (pos >= EIP76_RNG_OUTPUT_SIZE) {
+			pos = 0;
+		}
+
+		if (pos != 0U) {
+			ret = eip76_rng_read_rand_buf(rand, true);
+		}
+
+		/* Only advance FIFO index if it is non zero or
+		 * the update from TRNG HW was successful
+		 */
+		if (pos || ret > 0) {
+			data[i] = rand[pos++];
+			ret = 0;
+		} else {
+			ret = -1;
+			break;
+		}
+	}
+
+	spin_unlock(&rng_lock);
+
+	return ret;
+}
diff --git a/drivers/st/fmc/stm32_fmc2_nand.c b/drivers/st/fmc/stm32_fmc2_nand.c
index a58a243..453069b 100644
--- a/drivers/st/fmc/stm32_fmc2_nand.c
+++ b/drivers/st/fmc/stm32_fmc2_nand.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, STMicroelectronics - All Rights Reserved
+ * Copyright (c) 2019-2021, STMicroelectronics - All Rights Reserved
  *
  * SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
  */
@@ -200,9 +200,6 @@
 	if ((twait < NAND_TCS_MIN) && (tset_mem < (NAND_TCS_MIN - twait))) {
 		tset_mem = NAND_TCS_MIN - twait;
 	}
-	if ((twait < NAND_TALS_MIN) && (tset_mem < (NAND_TALS_MIN - twait))) {
-		tset_mem = NAND_TALS_MIN - twait;
-	}
 	if ((twait > thiz) && ((twait - thiz) < NAND_TDS_MIN) &&
 	    (tset_mem < (NAND_TDS_MIN - (twait - thiz)))) {
 		tset_mem = NAND_TDS_MIN - (twait - thiz);
@@ -244,12 +241,6 @@
 	if ((twait < NAND_TCS_MIN) && (tset_att < (NAND_TCS_MIN - twait))) {
 		tset_att = NAND_TCS_MIN - twait;
 	}
-	if ((twait < NAND_TCLS_MIN) && (tset_att < (NAND_TCLS_MIN - twait))) {
-		tset_att = NAND_TCLS_MIN - twait;
-	}
-	if ((twait < NAND_TALS_MIN) && (tset_att < (NAND_TALS_MIN - twait))) {
-		tset_att = NAND_TALS_MIN - twait;
-	}
 	if ((thold_mem < NAND_TRHW_MIN) &&
 	    (tset_att < (NAND_TRHW_MIN - thold_mem))) {
 		tset_att = NAND_TRHW_MIN - thold_mem;
diff --git a/include/arch/aarch32/arch.h b/include/arch/aarch32/arch.h
index c30073b..54ec009 100644
--- a/include/arch/aarch32/arch.h
+++ b/include/arch/aarch32/arch.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -116,6 +116,9 @@
 #define ID_PFR0_AMU_SHIFT	U(20)
 #define ID_PFR0_AMU_LENGTH	U(4)
 #define ID_PFR0_AMU_MASK	U(0xf)
+#define ID_PFR0_AMU_NOT_SUPPORTED	U(0x0)
+#define ID_PFR0_AMU_V1		U(0x1)
+#define ID_PFR0_AMU_V1P1	U(0x2)
 
 #define ID_PFR0_DIT_SHIFT	U(24)
 #define ID_PFR0_DIT_LENGTH	U(4)
@@ -653,7 +656,7 @@
 #define PAR_ADDR_MASK	(BIT_64(40) - ULL(1)) /* 40-bits-wide page address */
 
 /*******************************************************************************
- * Definitions for system register interface to AMU for ARMv8.4 onwards
+ * Definitions for system register interface to AMU for FEAT_AMUv1
  ******************************************************************************/
 #define AMCR		p15, 0, c13, c2, 0
 #define AMCFGR		p15, 0, c13, c2, 1
@@ -712,6 +715,9 @@
 #define AMEVTYPER1E	p15, 0, c13, c15, 6
 #define AMEVTYPER1F	p15, 0, c13, c15, 7
 
+/* AMCR definitions */
+#define AMCR_CG1RZ_BIT		(ULL(1) << 17)
+
 /* AMCFGR definitions */
 #define AMCFGR_NCG_SHIFT	U(28)
 #define AMCFGR_NCG_MASK		U(0xf)
diff --git a/include/arch/aarch32/arch_helpers.h b/include/arch/aarch32/arch_helpers.h
index 82efb18..726baf5 100644
--- a/include/arch/aarch32/arch_helpers.h
+++ b/include/arch/aarch32/arch_helpers.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -303,6 +303,7 @@
 /* Coproc registers for 32bit AMU support */
 DEFINE_COPROCR_READ_FUNC(amcfgr, AMCFGR)
 DEFINE_COPROCR_READ_FUNC(amcgcr, AMCGCR)
+DEFINE_COPROCR_RW_FUNCS(amcr, AMCR)
 
 DEFINE_COPROCR_RW_FUNCS(amcntenset0, AMCNTENSET0)
 DEFINE_COPROCR_RW_FUNCS(amcntenset1, AMCNTENSET1)
diff --git a/include/arch/aarch64/arch.h b/include/arch/aarch64/arch.h
index 2cdc7b2..b86a13e 100644
--- a/include/arch/aarch64/arch.h
+++ b/include/arch/aarch64/arch.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2020, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2021, ARM Limited and Contributors. All rights reserved.
  * Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
@@ -161,6 +161,9 @@
 #define ID_AA64PFR0_EL3_SHIFT	U(12)
 #define ID_AA64PFR0_AMU_SHIFT	U(44)
 #define ID_AA64PFR0_AMU_MASK	ULL(0xf)
+#define ID_AA64PFR0_AMU_NOT_SUPPORTED	U(0x0)
+#define ID_AA64PFR0_AMU_V1	U(0x1)
+#define ID_AA64PFR0_AMU_V1P1	U(0x2)
 #define ID_AA64PFR0_ELX_MASK	ULL(0xf)
 #define ID_AA64PFR0_GIC_SHIFT	U(24)
 #define ID_AA64PFR0_GIC_WIDTH	U(4)
@@ -406,9 +409,10 @@
 #define SCR_RES1_BITS		((U(1) << 4) | (U(1) << 5))
 #define SCR_TWEDEL_SHIFT	U(30)
 #define SCR_TWEDEL_MASK		ULL(0xf)
+#define SCR_AMVOFFEN_BIT	(UL(1) << 35)
 #define SCR_TWEDEn_BIT		(UL(1) << 29)
-#define SCR_ECVEN_BIT           (UL(1) << 28)
-#define SCR_FGTEN_BIT           (UL(1) << 27)
+#define SCR_ECVEN_BIT		(UL(1) << 28)
+#define SCR_FGTEN_BIT		(UL(1) << 27)
 #define SCR_ATA_BIT		(UL(1) << 26)
 #define SCR_FIEN_BIT		(UL(1) << 21)
 #define SCR_EEL2_BIT		(UL(1) << 18)
@@ -479,6 +483,7 @@
 #define VTTBR_BADDR_SHIFT	U(0)
 
 /* HCR definitions */
+#define HCR_AMVOFFEN_BIT	(ULL(1) << 51)
 #define HCR_API_BIT		(ULL(1) << 41)
 #define HCR_APK_BIT		(ULL(1) << 40)
 #define HCR_E2H_BIT		(ULL(1) << 34)
@@ -721,13 +726,13 @@
 #define MAX_CACHE_LINE_SIZE	U(0x800) /* 2KB */
 
 /* Physical timer control register bit fields shifts and masks */
-#define CNTP_CTL_ENABLE_SHIFT   U(0)
-#define CNTP_CTL_IMASK_SHIFT    U(1)
-#define CNTP_CTL_ISTATUS_SHIFT  U(2)
+#define CNTP_CTL_ENABLE_SHIFT	U(0)
+#define CNTP_CTL_IMASK_SHIFT	U(1)
+#define CNTP_CTL_ISTATUS_SHIFT	U(2)
 
-#define CNTP_CTL_ENABLE_MASK    U(1)
-#define CNTP_CTL_IMASK_MASK     U(1)
-#define CNTP_CTL_ISTATUS_MASK   U(1)
+#define CNTP_CTL_ENABLE_MASK	U(1)
+#define CNTP_CTL_IMASK_MASK	U(1)
+#define CNTP_CTL_ISTATUS_MASK	U(1)
 
 /* Physical timer control macros */
 #define CNTP_CTL_ENABLE_BIT	(U(1) << CNTP_CTL_ENABLE_SHIFT)
@@ -913,7 +918,7 @@
 #define MPAM3_EL3		S3_6_C10_C5_0
 
 /*******************************************************************************
- * Definitions for system register interface to AMU for ARMv8.4 onwards
+ * Definitions for system register interface to AMU for FEAT_AMUv1
  ******************************************************************************/
 #define AMCR_EL0		S3_3_C13_C2_0
 #define AMCFGR_EL0		S3_3_C13_C2_1
@@ -992,6 +997,50 @@
 #define MPAMIDR_HAS_HCR_BIT		(ULL(1) << 17)
 
 /*******************************************************************************
+ * Definitions for system register interface to AMU for FEAT_AMUv1p1
+ ******************************************************************************/
+
+/* Definition for register defining which virtual offsets are implemented. */
+#define AMCG1IDR_EL0		S3_3_C13_C2_6
+#define AMCG1IDR_CTR_MASK	ULL(0xffff)
+#define AMCG1IDR_CTR_SHIFT	U(0)
+#define AMCG1IDR_VOFF_MASK	ULL(0xffff)
+#define AMCG1IDR_VOFF_SHIFT	U(16)
+
+/* New bit added to AMCR_EL0 */
+#define AMCR_CG1RZ_BIT		(ULL(0x1) << 17)
+
+/*
+ * Definitions for virtual offset registers for architected activity monitor
+ * event counters.
+ * AMEVCNTVOFF01_EL2 intentionally left undefined, as it does not exist.
+ */
+#define AMEVCNTVOFF00_EL2	S3_4_C13_C8_0
+#define AMEVCNTVOFF02_EL2	S3_4_C13_C8_2
+#define AMEVCNTVOFF03_EL2	S3_4_C13_C8_3
+
+/*
+ * Definitions for virtual offset registers for auxiliary activity monitor event
+ * counters.
+ */
+#define AMEVCNTVOFF10_EL2	S3_4_C13_C10_0
+#define AMEVCNTVOFF11_EL2	S3_4_C13_C10_1
+#define AMEVCNTVOFF12_EL2	S3_4_C13_C10_2
+#define AMEVCNTVOFF13_EL2	S3_4_C13_C10_3
+#define AMEVCNTVOFF14_EL2	S3_4_C13_C10_4
+#define AMEVCNTVOFF15_EL2	S3_4_C13_C10_5
+#define AMEVCNTVOFF16_EL2	S3_4_C13_C10_6
+#define AMEVCNTVOFF17_EL2	S3_4_C13_C10_7
+#define AMEVCNTVOFF18_EL2	S3_4_C13_C11_0
+#define AMEVCNTVOFF19_EL2	S3_4_C13_C11_1
+#define AMEVCNTVOFF1A_EL2	S3_4_C13_C11_2
+#define AMEVCNTVOFF1B_EL2	S3_4_C13_C11_3
+#define AMEVCNTVOFF1C_EL2	S3_4_C13_C11_4
+#define AMEVCNTVOFF1D_EL2	S3_4_C13_C11_5
+#define AMEVCNTVOFF1E_EL2	S3_4_C13_C11_6
+#define AMEVCNTVOFF1F_EL2	S3_4_C13_C11_7
+
+/*******************************************************************************
  * RAS system registers
  ******************************************************************************/
 #define DISR_EL1		S3_0_C12_C1_1
diff --git a/include/arch/aarch64/arch_features.h b/include/arch/aarch64/arch_features.h
index 671b3dc..47a797a 100644
--- a/include/arch/aarch64/arch_features.h
+++ b/include/arch/aarch64/arch_features.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, Arm Limited. All rights reserved.
+ * Copyright (c) 2019-2021, Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -82,6 +82,12 @@
 		ID_AA64ISAR0_RNDR_MASK);
 }
 
+static inline bool is_armv8_6_feat_amuv1p1_present(void)
+{
+	return (((read_id_aa64pfr0_el1() >> ID_AA64PFR0_AMU_SHIFT) &
+		ID_AA64PFR0_AMU_MASK) >= ID_AA64PFR0_AMU_V1P1);
+}
+
 /*
  * Return MPAM version:
  *
diff --git a/include/arch/aarch64/arch_helpers.h b/include/arch/aarch64/arch_helpers.h
index 7fafafc..8c3400a 100644
--- a/include/arch/aarch64/arch_helpers.h
+++ b/include/arch/aarch64/arch_helpers.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2020, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -485,6 +485,8 @@
 
 DEFINE_RENAME_SYSREG_READ_FUNC(amcfgr_el0, AMCFGR_EL0)
 DEFINE_RENAME_SYSREG_READ_FUNC(amcgcr_el0, AMCGCR_EL0)
+DEFINE_RENAME_SYSREG_READ_FUNC(amcg1idr_el0, AMCG1IDR_EL0)
+DEFINE_RENAME_SYSREG_RW_FUNCS(amcr_el0, AMCR_EL0)
 DEFINE_RENAME_SYSREG_RW_FUNCS(amcntenclr0_el0, AMCNTENCLR0_EL0)
 DEFINE_RENAME_SYSREG_RW_FUNCS(amcntenset0_el0, AMCNTENSET0_EL0)
 DEFINE_RENAME_SYSREG_RW_FUNCS(amcntenclr1_el0, AMCNTENCLR1_EL0)
diff --git a/include/drivers/marvell/mochi/cp110_setup.h b/include/drivers/marvell/mochi/cp110_setup.h
index 11dc4e0..4a69257 100644
--- a/include/drivers/marvell/mochi/cp110_setup.h
+++ b/include/drivers/marvell/mochi/cp110_setup.h
@@ -31,6 +31,9 @@
 #define MAX_STREAM_ID_PER_CP		(0x10)
 #define STREAM_ID_BASE			(0x40)
 
+#define MVEBU_SECUREBOOT_CTRL_REG	(MVEBU_RFU_BASE + 0x4730)
+#define MVEBU_SECUREBOOT_EN_MASK	BIT(0)
+
 static inline uint32_t cp110_device_id_get(uintptr_t base)
 {
 	/* Returns:
@@ -50,6 +53,12 @@
 		MVEBU_DEVICE_REV_OFFSET;
 }
 
+static inline uint32_t is_secure(void)
+{
+	return !!(mmio_read_32(MVEBU_SECUREBOOT_CTRL_REG) &
+			       MVEBU_SECUREBOOT_EN_MASK);
+}
+
 void cp110_init(uintptr_t cp110_base, uint32_t stream_id);
 void cp110_ble_init(uintptr_t cp110_base);
 void cp110_amb_init(uintptr_t base);
diff --git a/include/drivers/rambus/trng_ip_76.h b/include/drivers/rambus/trng_ip_76.h
new file mode 100644
index 0000000..6de8fc7
--- /dev/null
+++ b/include/drivers/rambus/trng_ip_76.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2020, Marvell Technology Group Ltd. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+
+#ifndef __TRNG_IP_76_H__
+#define __TRNG_IP_76_H__
+
+#include <stdbool.h>
+#include <stdint.h>
+
+int32_t eip76_rng_read_rand_buf(void *data, bool wait);
+int32_t eip76_rng_probe(uintptr_t base_addr);
+int32_t eip76_rng_get_random(uint8_t *data, uint32_t len);
+
+#endif /* __TRNG_IP_76_H__ */
diff --git a/include/lib/cpus/aarch64/cortex_makalu.h b/include/lib/cpus/aarch64/cortex_makalu.h
new file mode 100644
index 0000000..4e0dc86
--- /dev/null
+++ b/include/lib/cpus/aarch64/cortex_makalu.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2021, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef CORTEX_MAKALU_H
+#define CORTEX_MAKALU_H
+
+#define CORTEX_MAKALU_MIDR					U(0x410FD4D0)
+
+/*******************************************************************************
+ * CPU Extended Control register specific definitions
+ ******************************************************************************/
+#define CORTEX_MAKALU_CPUECTLR_EL1				S3_0_C15_C1_4
+
+/*******************************************************************************
+ * CPU Power Control register specific definitions
+ ******************************************************************************/
+#define CORTEX_MAKALU_CPUPWRCTLR_EL1				S3_0_C15_C2_7
+#define CORTEX_MAKALU_CPUPWRCTLR_EL1_CORE_PWRDN_BIT		U(1)
+
+#endif /* CORTEX_MAKALU_H */
diff --git a/include/lib/extensions/amu.h b/include/lib/extensions/amu.h
index dcbdd5a..3a70e4f 100644
--- a/include/lib/extensions/amu.h
+++ b/include/lib/extensions/amu.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -66,19 +66,31 @@
 
 struct amu_ctx {
 	uint64_t group0_cnts[AMU_GROUP0_NR_COUNTERS];
+#if __aarch64__
+	/* Architected event counter 1 does not have an offset register. */
+	uint64_t group0_voffsets[AMU_GROUP0_NR_COUNTERS-1];
+#endif
 
 #if AMU_GROUP1_NR_COUNTERS
 	uint64_t group1_cnts[AMU_GROUP1_NR_COUNTERS];
+#if __aarch64__
+	uint64_t group1_voffsets[AMU_GROUP1_NR_COUNTERS];
+#endif
 #endif
 };
 
-bool amu_supported(void);
+unsigned int amu_get_version(void);
 void amu_enable(bool el2_unused);
 
 /* Group 0 configuration helpers */
 uint64_t amu_group0_cnt_read(unsigned int idx);
 void amu_group0_cnt_write(unsigned int idx, uint64_t val);
 
+#if __aarch64__
+uint64_t amu_group0_voffset_read(unsigned int idx);
+void amu_group0_voffset_write(unsigned int idx, uint64_t val);
+#endif
+
 #if AMU_GROUP1_NR_COUNTERS
 bool amu_group1_supported(void);
 
@@ -86,6 +98,12 @@
 uint64_t amu_group1_cnt_read(unsigned int idx);
 void amu_group1_cnt_write(unsigned int idx, uint64_t val);
 void amu_group1_set_evtype(unsigned int idx, unsigned int val);
+
+#if __aarch64__
+uint64_t amu_group1_voffset_read(unsigned int idx);
+void amu_group1_voffset_write(unsigned int idx, uint64_t val);
+#endif
+
 #endif
 
 #endif /* AMU_H */
diff --git a/include/lib/extensions/amu_private.h b/include/lib/extensions/amu_private.h
index 30ce59d..3b4b47c 100644
--- a/include/lib/extensions/amu_private.h
+++ b/include/lib/extensions/amu_private.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -16,4 +16,12 @@
 void amu_group1_cnt_write_internal(unsigned int idx, uint64_t val);
 void amu_group1_set_evtype_internal(unsigned int idx, unsigned int val);
 
+#if __aarch64__
+uint64_t amu_group0_voffset_read_internal(unsigned int idx);
+void amu_group0_voffset_write_internal(unsigned int idx, uint64_t val);
+
+uint64_t amu_group1_voffset_read_internal(unsigned int idx);
+void amu_group1_voffset_write_internal(unsigned int idx, uint64_t val);
+#endif
+
 #endif /* AMU_PRIVATE_H */
diff --git a/include/lib/libc/arm_acle.h b/include/lib/libc/arm_acle.h
new file mode 100644
index 0000000..953933f
--- /dev/null
+++ b/include/lib/libc/arm_acle.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2021 ARM Limited
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * The definitions below are a subset of what we would normally get by using
+ * the compiler's version of arm_acle.h. We can't use that directly because
+ * we specify -nostdinc in the Makefiles.
+ *
+ * We just define the functions we need so far.
+ */
+
+#ifndef ARM_ACLE_H
+#define ARM_ACLE_H
+
+#if !defined(__aarch64__) || defined(__clang__)
+#	define __crc32w __builtin_arm_crc32w
+#else
+#	define __crc32w __builtin_aarch64_crc32w
+#endif
+
+#endif	/* ARM_ACLE_H */
diff --git a/lib/cpus/aarch32/cpu_helpers.S b/lib/cpus/aarch32/cpu_helpers.S
index 9b5d787..6ed800c 100644
--- a/lib/cpus/aarch32/cpu_helpers.S
+++ b/lib/cpus/aarch32/cpu_helpers.S
@@ -78,6 +78,10 @@
 	mov	r1, #CPU_PWR_DWN_OPS
 	add	r1, r1, r2, lsl #2
 	ldr	r1, [r0, r1]
+#if ENABLE_ASSERTIONS
+	cmp	r1, #0
+	ASM_ASSERT(ne)
+#endif
 	bx	r1
 endfunc prepare_cpu_pwr_dwn
 
@@ -146,6 +150,10 @@
 
 	/* Subtract the increment and offset to get the cpu-ops pointer */
 	sub	r0, r4, #(CPU_OPS_SIZE + CPU_MIDR)
+#if ENABLE_ASSERTIONS
+	cmp	r0, #0
+	ASM_ASSERT(ne)
+#endif
 error_exit:
 	bx	lr
 endfunc get_cpu_ops_ptr
@@ -224,7 +232,15 @@
 	 * function. If it's non-NULL, jump to the function in turn.
 	 */
 	bl	_cpu_data
+#if ENABLE_ASSERTIONS
+	cmp	r0, #0
+	ASM_ASSERT(ne)
+#endif
 	ldr	r1, [r0, #CPU_DATA_CPU_OPS_PTR]
+#if ENABLE_ASSERTIONS
+	cmp	r1, #0
+	ASM_ASSERT(ne)
+#endif
 	ldr	r0, [r1, #CPU_ERRATA_FUNC]
 	cmp	r0, #0
 	beq	1f
diff --git a/lib/cpus/aarch64/cortex_makalu.S b/lib/cpus/aarch64/cortex_makalu.S
new file mode 100644
index 0000000..98c7d6d
--- /dev/null
+++ b/lib/cpus/aarch64/cortex_makalu.S
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2021, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <common/bl_common.h>
+#include <cortex_makalu.h>
+#include <cpu_macros.S>
+#include <plat_macros.S>
+
+/* Hardware handled coherency */
+#if HW_ASSISTED_COHERENCY == 0
+#error "Cortex Makalu must be compiled with HW_ASSISTED_COHERENCY enabled"
+#endif
+
+/* 64-bit only core */
+#if CTX_INCLUDE_AARCH32_REGS == 1
+#error "Cortex Makalu supports only AArch64. Compile with CTX_INCLUDE_AARCH32_REGS=0"
+#endif
+
+func cortex_makalu_reset_func
+	/* Disable speculative loads */
+	msr	SSBS, xzr
+	isb
+	ret
+endfunc cortex_makalu_reset_func
+
+	/* ----------------------------------------------------
+	 * HW will do the cache maintenance while powering down
+	 * ----------------------------------------------------
+	 */
+func cortex_makalu_core_pwr_dwn
+	/* ---------------------------------------------------
+	 * Enable CPU power down bit in power control register
+	 * ---------------------------------------------------
+	 */
+	mrs	x0, CORTEX_MAKALU_CPUPWRCTLR_EL1
+	orr	x0, x0, #CORTEX_MAKALU_CPUPWRCTLR_EL1_CORE_PWRDN_BIT
+	msr	CORTEX_MAKALU_CPUPWRCTLR_EL1, x0
+	isb
+	ret
+endfunc cortex_makalu_core_pwr_dwn
+
+#if REPORT_ERRATA
+/*
+ * Errata printing function for Cortex Makalu. Must follow AAPCS.
+ */
+func cortex_makalu_errata_report
+	ret
+endfunc cortex_makalu_errata_report
+#endif
+
+	/* ---------------------------------------------
+	 * This function provides Cortex Makalu-specific
+	 * register information for crash reporting.
+	 * It needs to return with x6 pointing to
+	 * a list of register names in ascii and
+	 * x8 - x15 having values of registers to be
+	 * reported.
+	 * ---------------------------------------------
+	 */
+.section .rodata.cortex_makalu_regs, "aS"
+cortex_makalu_regs:  /* The ascii list of register names to be reported */
+	.asciz	"cpuectlr_el1", ""
+
+func cortex_makalu_cpu_reg_dump
+	adr	x6, cortex_makalu_regs
+	mrs	x8, CORTEX_MAKALU_CPUECTLR_EL1
+	ret
+endfunc cortex_makalu_cpu_reg_dump
+
+declare_cpu_ops cortex_makalu, CORTEX_MAKALU_MIDR, \
+	cortex_makalu_reset_func, \
+	cortex_makalu_core_pwr_dwn
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
index 72d463b..e0e4298 100644
--- a/lib/el3_runtime/aarch64/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2020, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -217,6 +217,16 @@
 	}
 
 	/*
+	 * FEAT_AMUv1p1 virtual offset registers are only accessible from EL3
+	 * and EL2, when clear, this bit traps accesses from EL2 so we set it
+	 * to 1 when EL2 is present.
+	 */
+	if (is_armv8_6_feat_amuv1p1_present() &&
+		(el_implemented(2) != EL_IMPL_NONE)) {
+		scr_el3 |= SCR_AMVOFFEN_BIT;
+	}
+
+	/*
 	 * Initialise SCTLR_EL1 to the reset value corresponding to the target
 	 * execution state setting all fields rather than relying of the hw.
 	 * Some fields have architecturally UNKNOWN reset values and these are
diff --git a/lib/extensions/amu/aarch32/amu.c b/lib/extensions/amu/aarch32/amu.c
index 0f75f07..ed56ddd 100644
--- a/lib/extensions/amu/aarch32/amu.c
+++ b/lib/extensions/amu/aarch32/amu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -18,13 +18,17 @@
 
 static struct amu_ctx amu_ctxs[PLATFORM_CORE_COUNT];
 
-/* Check if AMUv1 for Armv8.4 or 8.6 is implemented */
-bool amu_supported(void)
+/*
+ * Get AMU version value from pfr0.
+ * Return values
+ *   ID_PFR0_AMU_V1: FEAT_AMUv1 supported (introduced in ARM v8.4)
+ *   ID_PFR0_AMU_V1P1: FEAT_AMUv1p1 supported (introduced in ARM v8.6)
+ *   ID_PFR0_AMU_NOT_SUPPORTED: not supported
+ */
+unsigned int amu_get_version(void)
 {
-	uint32_t features = read_id_pfr0() >> ID_PFR0_AMU_SHIFT;
-
-	features &= ID_PFR0_AMU_MASK;
-	return ((features == 1U) || (features == 2U));
+	return (unsigned int)(read_id_pfr0() >> ID_PFR0_AMU_SHIFT) &
+		ID_PFR0_AMU_MASK;
 }
 
 #if AMU_GROUP1_NR_COUNTERS
@@ -43,7 +47,7 @@
  */
 void amu_enable(bool el2_unused)
 {
-	if (!amu_supported()) {
+	if (amu_get_version() == ID_PFR0_AMU_NOT_SUPPORTED) {
 		return;
 	}
 
@@ -87,12 +91,31 @@
 	/* Enable group 1 counters */
 	write_amcntenset1(AMU_GROUP1_COUNTERS_MASK);
 #endif
+
+	/* Initialize FEAT_AMUv1p1 features if present. */
+	if (amu_get_version() < ID_PFR0_AMU_V1P1) {
+		return;
+	}
+
+#if AMU_RESTRICT_COUNTERS
+	/*
+	 * FEAT_AMUv1p1 adds a register field to restrict access to group 1
+	 * counters at all but the highest implemented EL.  This is controlled
+	 * with the AMU_RESTRICT_COUNTERS compile time flag, when set, system
+	 * register reads at lower ELs return zero.  Reads from the memory
+	 * mapped view are unaffected.
+	 */
+	VERBOSE("AMU group 1 counter access restricted.\n");
+	write_amcr(read_amcr() | AMCR_CG1RZ_BIT);
+#else
+	write_amcr(read_amcr() & ~AMCR_CG1RZ_BIT);
+#endif
 }
 
 /* Read the group 0 counter identified by the given `idx`. */
 uint64_t amu_group0_cnt_read(unsigned int idx)
 {
-	assert(amu_supported());
+	assert(amu_get_version() != ID_PFR0_AMU_NOT_SUPPORTED);
 	assert(idx < AMU_GROUP0_NR_COUNTERS);
 
 	return amu_group0_cnt_read_internal(idx);
@@ -101,7 +124,7 @@
 /* Write the group 0 counter identified by the given `idx` with `val` */
 void amu_group0_cnt_write(unsigned  int idx, uint64_t val)
 {
-	assert(amu_supported());
+	assert(amu_get_version() != ID_PFR0_AMU_NOT_SUPPORTED);
 	assert(idx < AMU_GROUP0_NR_COUNTERS);
 
 	amu_group0_cnt_write_internal(idx, val);
@@ -112,7 +135,7 @@
 /* Read the group 1 counter identified by the given `idx` */
 uint64_t amu_group1_cnt_read(unsigned  int idx)
 {
-	assert(amu_supported());
+	assert(amu_get_version() != ID_PFR0_AMU_NOT_SUPPORTED);
 	assert(amu_group1_supported());
 	assert(idx < AMU_GROUP1_NR_COUNTERS);
 
@@ -122,7 +145,7 @@
 /* Write the group 1 counter identified by the given `idx` with `val` */
 void amu_group1_cnt_write(unsigned  int idx, uint64_t val)
 {
-	assert(amu_supported());
+	assert(amu_get_version() != ID_PFR0_AMU_NOT_SUPPORTED);
 	assert(amu_group1_supported());
 	assert(idx < AMU_GROUP1_NR_COUNTERS);
 
@@ -136,7 +159,7 @@
  */
 void amu_group1_set_evtype(unsigned int idx, unsigned int val)
 {
-	assert(amu_supported());
+	assert(amu_get_version() != ID_PFR0_AMU_NOT_SUPPORTED);
 	assert(amu_group1_supported());
 	assert(idx < AMU_GROUP1_NR_COUNTERS);
 
@@ -150,7 +173,7 @@
 	struct amu_ctx *ctx = &amu_ctxs[plat_my_core_pos()];
 	unsigned int i;
 
-	if (!amu_supported()) {
+	if (amu_get_version() == ID_PFR0_AMU_NOT_SUPPORTED) {
 		return (void *)-1;
 	}
 
@@ -197,7 +220,7 @@
 	struct amu_ctx *ctx = &amu_ctxs[plat_my_core_pos()];
 	unsigned int i;
 
-	if (!amu_supported()) {
+	if (amu_get_version() == ID_PFR0_AMU_NOT_SUPPORTED) {
 		return (void *)-1;
 	}
 
diff --git a/lib/extensions/amu/aarch32/amu_helpers.S b/lib/extensions/amu/aarch32/amu_helpers.S
index effb8e5..d387341 100644
--- a/lib/extensions/amu/aarch32/amu_helpers.S
+++ b/lib/extensions/amu/aarch32/amu_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -75,13 +75,13 @@
 
 1:
 	stcopr16	r2, r3, AMEVCNTR00	/* index 0 */
-	bx 		lr
+	bx		lr
 	stcopr16	r2, r3, AMEVCNTR01	/* index 1 */
-	bx 		lr
+	bx		lr
 	stcopr16	r2, r3, AMEVCNTR02	/* index 2 */
-	bx 		lr
+	bx		lr
 	stcopr16	r2, r3, AMEVCNTR03	/* index 3 */
-	bx 		lr
+	bx		lr
 endfunc amu_group0_cnt_write_internal
 
 /*
@@ -169,37 +169,37 @@
 	bx	r1
 
 1:
-	stcopr16	r2, r3,	AMEVCNTR10	/* index 0 */
+	stcopr16	r2, r3, AMEVCNTR10	/* index 0 */
 	bx		lr
-	stcopr16	r2, r3,	AMEVCNTR11	/* index 1 */
+	stcopr16	r2, r3, AMEVCNTR11	/* index 1 */
 	bx		lr
-	stcopr16	r2, r3,	AMEVCNTR12	/* index 2 */
+	stcopr16	r2, r3, AMEVCNTR12	/* index 2 */
 	bx		lr
-	stcopr16	r2, r3,	AMEVCNTR13	/* index 3 */
+	stcopr16	r2, r3, AMEVCNTR13	/* index 3 */
 	bx		lr
-	stcopr16	r2, r3,	AMEVCNTR14	/* index 4 */
+	stcopr16	r2, r3, AMEVCNTR14	/* index 4 */
 	bx		lr
-	stcopr16	r2, r3,	AMEVCNTR15	/* index 5 */
+	stcopr16	r2, r3, AMEVCNTR15	/* index 5 */
 	bx		lr
-	stcopr16	r2, r3,	AMEVCNTR16	/* index 6 */
+	stcopr16	r2, r3, AMEVCNTR16	/* index 6 */
 	bx		lr
-	stcopr16	r2, r3,	AMEVCNTR17	/* index 7 */
+	stcopr16	r2, r3, AMEVCNTR17	/* index 7 */
 	bx		lr
-	stcopr16	r2, r3,	AMEVCNTR18	/* index 8 */
+	stcopr16	r2, r3, AMEVCNTR18	/* index 8 */
 	bx		lr
-	stcopr16	r2, r3,	AMEVCNTR19	/* index 9 */
+	stcopr16	r2, r3, AMEVCNTR19	/* index 9 */
 	bx		lr
-	stcopr16	r2, r3,	AMEVCNTR1A	/* index 10 */
+	stcopr16	r2, r3, AMEVCNTR1A	/* index 10 */
 	bx		lr
-	stcopr16	r2, r3,	AMEVCNTR1B	/* index 11 */
+	stcopr16	r2, r3, AMEVCNTR1B	/* index 11 */
 	bx		lr
-	stcopr16	r2, r3,	AMEVCNTR1C	/* index 12 */
+	stcopr16	r2, r3, AMEVCNTR1C	/* index 12 */
 	bx		lr
-	stcopr16	r2, r3,	AMEVCNTR1D	/* index 13 */
+	stcopr16	r2, r3, AMEVCNTR1D	/* index 13 */
 	bx		lr
-	stcopr16	r2, r3,	AMEVCNTR1E	/* index 14 */
+	stcopr16	r2, r3, AMEVCNTR1E	/* index 14 */
 	bx		lr
-	stcopr16	r2, r3,	AMEVCNTR1F	/* index 15 */
+	stcopr16	r2, r3, AMEVCNTR1F	/* index 15 */
 	bx		lr
 endfunc amu_group1_cnt_write_internal
 
@@ -234,36 +234,36 @@
 	bx	r2
 
 1:
-	stcopr	r1,	AMEVTYPER10 /* index 0 */
+	stcopr	r1, AMEVTYPER10 /* index 0 */
 	bx	lr
-	stcopr	r1,	AMEVTYPER11 /* index 1 */
+	stcopr	r1, AMEVTYPER11 /* index 1 */
 	bx	lr
-	stcopr	r1,	AMEVTYPER12 /* index 2 */
+	stcopr	r1, AMEVTYPER12 /* index 2 */
 	bx	lr
-	stcopr	r1,	AMEVTYPER13 /* index 3 */
+	stcopr	r1, AMEVTYPER13 /* index 3 */
 	bx	lr
-	stcopr	r1,	AMEVTYPER14 /* index 4 */
+	stcopr	r1, AMEVTYPER14 /* index 4 */
 	bx	lr
-	stcopr	r1,	AMEVTYPER15 /* index 5 */
+	stcopr	r1, AMEVTYPER15 /* index 5 */
 	bx	lr
-	stcopr	r1,	AMEVTYPER16 /* index 6 */
+	stcopr	r1, AMEVTYPER16 /* index 6 */
 	bx	lr
-	stcopr	r1,	AMEVTYPER17 /* index 7 */
+	stcopr	r1, AMEVTYPER17 /* index 7 */
 	bx	lr
-	stcopr	r1,	AMEVTYPER18 /* index 8 */
+	stcopr	r1, AMEVTYPER18 /* index 8 */
 	bx	lr
-	stcopr	r1,	AMEVTYPER19 /* index 9 */
+	stcopr	r1, AMEVTYPER19 /* index 9 */
 	bx	lr
-	stcopr	r1,	AMEVTYPER1A /* index 10 */
+	stcopr	r1, AMEVTYPER1A /* index 10 */
 	bx	lr
-	stcopr	r1,	AMEVTYPER1B /* index 11 */
+	stcopr	r1, AMEVTYPER1B /* index 11 */
 	bx	lr
-	stcopr	r1,	AMEVTYPER1C /* index 12 */
+	stcopr	r1, AMEVTYPER1C /* index 12 */
 	bx	lr
-	stcopr	r1,	AMEVTYPER1D /* index 13 */
+	stcopr	r1, AMEVTYPER1D /* index 13 */
 	bx	lr
-	stcopr	r1,	AMEVTYPER1E /* index 14 */
+	stcopr	r1, AMEVTYPER1E /* index 14 */
 	bx	lr
-	stcopr	r1,	AMEVTYPER1F /* index 15 */
+	stcopr	r1, AMEVTYPER1F /* index 15 */
 	bx	lr
 endfunc amu_group1_set_evtype_internal
diff --git a/lib/extensions/amu/aarch64/amu.c b/lib/extensions/amu/aarch64/amu.c
index 4997363..24c3737 100644
--- a/lib/extensions/amu/aarch64/amu.c
+++ b/lib/extensions/amu/aarch64/amu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -8,6 +8,7 @@
 #include <stdbool.h>
 
 #include <arch.h>
+#include <arch_features.h>
 #include <arch_helpers.h>
 
 #include <lib/el3_runtime/pubsub_events.h>
@@ -18,13 +19,17 @@
 
 static struct amu_ctx amu_ctxs[PLATFORM_CORE_COUNT];
 
-/* Check if AMUv1 for Armv8.4 or 8.6 is implemented */
-bool amu_supported(void)
+/*
+ * Get AMU version value from aa64pfr0.
+ * Return values
+ *   ID_AA64PFR0_AMU_V1: FEAT_AMUv1 supported (introduced in ARM v8.4)
+ *   ID_AA64PFR0_AMU_V1P1: FEAT_AMUv1p1 supported (introduced in ARM v8.6)
+ *   ID_AA64PFR0_AMU_NOT_SUPPORTED: not supported
+ */
+unsigned int amu_get_version(void)
 {
-	uint64_t features = read_id_aa64pfr0_el1() >> ID_AA64PFR0_AMU_SHIFT;
-
-	features &= ID_AA64PFR0_AMU_MASK;
-	return ((features == 1U) || (features == 2U));
+	return (unsigned int)(read_id_aa64pfr0_el1() >> ID_AA64PFR0_AMU_SHIFT) &
+		ID_AA64PFR0_AMU_MASK;
 }
 
 #if AMU_GROUP1_NR_COUNTERS
@@ -44,8 +49,9 @@
 void amu_enable(bool el2_unused)
 {
 	uint64_t v;
+	unsigned int amu_version = amu_get_version();
 
-	if (!amu_supported()) {
+	if (amu_version == ID_AA64PFR0_AMU_NOT_SUPPORTED) {
 		return;
 	}
 
@@ -96,12 +102,36 @@
 	/* Enable group 1 counters */
 	write_amcntenset1_el0(AMU_GROUP1_COUNTERS_MASK);
 #endif
+
+	/* Initialize FEAT_AMUv1p1 features if present. */
+	if (amu_version < ID_AA64PFR0_AMU_V1P1) {
+		return;
+	}
+
+	if (el2_unused) {
+		/* Make sure virtual offsets are disabled if EL2 not used. */
+		write_hcr_el2(read_hcr_el2() & ~HCR_AMVOFFEN_BIT);
+	}
+
+#if AMU_RESTRICT_COUNTERS
+	/*
+	 * FEAT_AMUv1p1 adds a register field to restrict access to group 1
+	 * counters at all but the highest implemented EL.  This is controlled
+	 * with the AMU_RESTRICT_COUNTERS compile time flag, when set, system
+	 * register reads at lower ELs return zero.  Reads from the memory
+	 * mapped view are unaffected.
+	 */
+	VERBOSE("AMU group 1 counter access restricted.\n");
+	write_amcr_el0(read_amcr_el0() | AMCR_CG1RZ_BIT);
+#else
+	write_amcr_el0(read_amcr_el0() & ~AMCR_CG1RZ_BIT);
+#endif
 }
 
 /* Read the group 0 counter identified by the given `idx`. */
 uint64_t amu_group0_cnt_read(unsigned int idx)
 {
-	assert(amu_supported());
+	assert(amu_get_version() != ID_AA64PFR0_AMU_NOT_SUPPORTED);
 	assert(idx < AMU_GROUP0_NR_COUNTERS);
 
 	return amu_group0_cnt_read_internal(idx);
@@ -110,18 +140,49 @@
 /* Write the group 0 counter identified by the given `idx` with `val` */
 void amu_group0_cnt_write(unsigned  int idx, uint64_t val)
 {
-	assert(amu_supported());
+	assert(amu_get_version() != ID_AA64PFR0_AMU_NOT_SUPPORTED);
 	assert(idx < AMU_GROUP0_NR_COUNTERS);
 
 	amu_group0_cnt_write_internal(idx, val);
 	isb();
 }
 
+/*
+ * Read the group 0 offset register for a given index. Index must be 0, 2,
+ * or 3, the register for 1 does not exist.
+ *
+ * Using this function requires FEAT_AMUv1p1 support.
+ */
+uint64_t amu_group0_voffset_read(unsigned int idx)
+{
+	assert(amu_get_version() >= ID_AA64PFR0_AMU_V1P1);
+	assert(idx < AMU_GROUP0_NR_COUNTERS);
+	assert(idx != 1U);
+
+	return amu_group0_voffset_read_internal(idx);
+}
+
+/*
+ * Write the group 0 offset register for a given index. Index must be 0, 2, or
+ * 3, the register for 1 does not exist.
+ *
+ * Using this function requires FEAT_AMUv1p1 support.
+ */
+void amu_group0_voffset_write(unsigned int idx, uint64_t val)
+{
+	assert(amu_get_version() >= ID_AA64PFR0_AMU_V1P1);
+	assert(idx < AMU_GROUP0_NR_COUNTERS);
+	assert(idx != 1U);
+
+	amu_group0_voffset_write_internal(idx, val);
+	isb();
+}
+
 #if AMU_GROUP1_NR_COUNTERS
 /* Read the group 1 counter identified by the given `idx` */
-uint64_t amu_group1_cnt_read(unsigned  int idx)
+uint64_t amu_group1_cnt_read(unsigned int idx)
 {
-	assert(amu_supported());
+	assert(amu_get_version() != ID_AA64PFR0_AMU_NOT_SUPPORTED);
 	assert(amu_group1_supported());
 	assert(idx < AMU_GROUP1_NR_COUNTERS);
 
@@ -129,9 +190,9 @@
 }
 
 /* Write the group 1 counter identified by the given `idx` with `val` */
-void amu_group1_cnt_write(unsigned  int idx, uint64_t val)
+void amu_group1_cnt_write(unsigned int idx, uint64_t val)
 {
-	assert(amu_supported());
+	assert(amu_get_version() != ID_AA64PFR0_AMU_NOT_SUPPORTED);
 	assert(amu_group1_supported());
 	assert(idx < AMU_GROUP1_NR_COUNTERS);
 
@@ -140,12 +201,45 @@
 }
 
 /*
+ * Read the group 1 offset register for a given index.
+ *
+ * Using this function requires FEAT_AMUv1p1 support.
+ */
+uint64_t amu_group1_voffset_read(unsigned int idx)
+{
+	assert(amu_get_version() >= ID_AA64PFR0_AMU_V1P1);
+	assert(amu_group1_supported());
+	assert(idx < AMU_GROUP1_NR_COUNTERS);
+	assert(((read_amcg1idr_el0() >> AMCG1IDR_VOFF_SHIFT) &
+		(1ULL << idx)) != 0ULL);
+
+	return amu_group1_voffset_read_internal(idx);
+}
+
+/*
+ * Write the group 1 offset register for a given index.
+ *
+ * Using this function requires FEAT_AMUv1p1 support.
+ */
+void amu_group1_voffset_write(unsigned int idx, uint64_t val)
+{
+	assert(amu_get_version() >= ID_AA64PFR0_AMU_V1P1);
+	assert(amu_group1_supported());
+	assert(idx < AMU_GROUP1_NR_COUNTERS);
+	assert(((read_amcg1idr_el0() >> AMCG1IDR_VOFF_SHIFT) &
+		(1ULL << idx)) != 0ULL);
+
+	amu_group1_voffset_write_internal(idx, val);
+	isb();
+}
+
+/*
  * Program the event type register for the given `idx` with
  * the event number `val`
  */
 void amu_group1_set_evtype(unsigned int idx, unsigned int val)
 {
-	assert(amu_supported());
+	assert(amu_get_version() != ID_AA64PFR0_AMU_NOT_SUPPORTED);
 	assert(amu_group1_supported());
 	assert(idx < AMU_GROUP1_NR_COUNTERS);
 
@@ -159,7 +253,7 @@
 	struct amu_ctx *ctx = &amu_ctxs[plat_my_core_pos()];
 	unsigned int i;
 
-	if (!amu_supported()) {
+	if (amu_get_version() == ID_AA64PFR0_AMU_NOT_SUPPORTED) {
 		return (void *)-1;
 	}
 
@@ -190,13 +284,37 @@
 		ctx->group0_cnts[i] = amu_group0_cnt_read(i);
 	}
 
+	/* Save group 0 virtual offsets if supported and enabled. */
+	if ((amu_get_version() >= ID_AA64PFR0_AMU_V1P1) &&
+			((read_hcr_el2() & HCR_AMVOFFEN_BIT) != 0ULL)) {
+		/* Not using a loop because count is fixed and index 1 DNE. */
+		ctx->group0_voffsets[0U] = amu_group0_voffset_read(0U);
+		ctx->group0_voffsets[1U] = amu_group0_voffset_read(2U);
+		ctx->group0_voffsets[2U] = amu_group0_voffset_read(3U);
+	}
+
 #if AMU_GROUP1_NR_COUNTERS
 	/* Save group 1 counters */
 	for (i = 0U; i < AMU_GROUP1_NR_COUNTERS; i++) {
-		if ((AMU_GROUP1_COUNTERS_MASK & (1U << i)) != 0U) {
+		if ((AMU_GROUP1_COUNTERS_MASK & (1UL << i)) != 0U) {
 			ctx->group1_cnts[i] = amu_group1_cnt_read(i);
 		}
 	}
+
+	/* Save group 1 virtual offsets if supported and enabled. */
+	if ((amu_get_version() >= ID_AA64PFR0_AMU_V1P1) &&
+			((read_hcr_el2() & HCR_AMVOFFEN_BIT) != 0ULL)) {
+		u_register_t amcg1idr = read_amcg1idr_el0() >>
+			AMCG1IDR_VOFF_SHIFT;
+		amcg1idr = amcg1idr & AMU_GROUP1_COUNTERS_MASK;
+
+		for (i = 0U; i < AMU_GROUP1_NR_COUNTERS; i++) {
+			if (((amcg1idr >> i) & 1ULL) != 0ULL) {
+				ctx->group1_voffsets[i] =
+					amu_group1_voffset_read(i);
+			}
+		}
+	}
 #endif
 	return (void *)0;
 }
@@ -206,7 +324,7 @@
 	struct amu_ctx *ctx = &amu_ctxs[plat_my_core_pos()];
 	unsigned int i;
 
-	if (!amu_supported()) {
+	if (amu_get_version() == ID_AA64PFR0_AMU_NOT_SUPPORTED) {
 		return (void *)-1;
 	}
 
@@ -227,17 +345,41 @@
 		amu_group0_cnt_write(i, ctx->group0_cnts[i]);
 	}
 
+	/* Restore group 0 virtual offsets if supported and enabled. */
+	if ((amu_get_version() >= ID_AA64PFR0_AMU_V1P1) &&
+			((read_hcr_el2() & HCR_AMVOFFEN_BIT) != 0ULL)) {
+		/* Not using a loop because count is fixed and index 1 DNE. */
+		amu_group0_voffset_write(0U, ctx->group0_voffsets[0U]);
+		amu_group0_voffset_write(2U, ctx->group0_voffsets[1U]);
+		amu_group0_voffset_write(3U, ctx->group0_voffsets[2U]);
+	}
+
 	/* Restore group 0 counter configuration */
 	write_amcntenset0_el0(AMU_GROUP0_COUNTERS_MASK);
 
 #if AMU_GROUP1_NR_COUNTERS
 	/* Restore group 1 counters */
 	for (i = 0U; i < AMU_GROUP1_NR_COUNTERS; i++) {
-		if ((AMU_GROUP1_COUNTERS_MASK & (1U << i)) != 0U) {
+		if ((AMU_GROUP1_COUNTERS_MASK & (1UL << i)) != 0U) {
 			amu_group1_cnt_write(i, ctx->group1_cnts[i]);
 		}
 	}
 
+	/* Restore group 1 virtual offsets if supported and enabled. */
+	if ((amu_get_version() >= ID_AA64PFR0_AMU_V1P1) &&
+			((read_hcr_el2() & HCR_AMVOFFEN_BIT) != 0ULL)) {
+		u_register_t amcg1idr = read_amcg1idr_el0() >>
+			AMCG1IDR_VOFF_SHIFT;
+		amcg1idr = amcg1idr & AMU_GROUP1_COUNTERS_MASK;
+
+		for (i = 0U; i < AMU_GROUP1_NR_COUNTERS; i++) {
+			if (((amcg1idr >> i) & 1ULL) != 0ULL) {
+				amu_group1_voffset_write(i,
+					ctx->group1_voffsets[i]);
+			}
+		}
+	}
+
 	/* Restore group 1 counter configuration */
 	write_amcntenset1_el0(AMU_GROUP1_COUNTERS_MASK);
 #endif
diff --git a/lib/extensions/amu/aarch64/amu_helpers.S b/lib/extensions/amu/aarch64/amu_helpers.S
index 89007a3..9989abd 100644
--- a/lib/extensions/amu/aarch64/amu_helpers.S
+++ b/lib/extensions/amu/aarch64/amu_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -14,6 +14,12 @@
 	.globl	amu_group1_cnt_write_internal
 	.globl	amu_group1_set_evtype_internal
 
+	/* FEAT_AMUv1p1 virtualisation offset register functions */
+	.globl	amu_group0_voffset_read_internal
+	.globl	amu_group0_voffset_write_internal
+	.globl	amu_group1_voffset_read_internal
+	.globl	amu_group1_voffset_write_internal
+
 /*
  * uint64_t amu_group0_cnt_read_internal(int idx);
  *
@@ -211,3 +217,169 @@
 	write	AMEVTYPER1E_EL0		/* index 14 */
 	write	AMEVTYPER1F_EL0		/* index 15 */
 endfunc amu_group1_set_evtype_internal
+
+/*
+ * Accessor functions for virtual offset registers added with FEAT_AMUv1p1
+ */
+
+/*
+ * uint64_t amu_group0_voffset_read_internal(int idx);
+ *
+ * Given `idx`, read the corresponding AMU virtual offset register
+ * and return it in `x0`.
+ */
+func amu_group0_voffset_read_internal
+	adr	x1, 1f
+#if ENABLE_ASSERTIONS
+	/*
+	 * It can be dangerous to call this function with an
+	 * out of bounds index.  Ensure `idx` is valid.
+	 */
+	tst	x0, #~3
+	ASM_ASSERT(eq)
+	/* Make sure idx != 1 since AMEVCNTVOFF01_EL2 does not exist */
+	cmp	x0, #1
+	ASM_ASSERT(ne)
+#endif
+	/*
+	 * Given `idx` calculate address of mrs/ret instruction pair
+	 * in the table below.
+	 */
+	add	x1, x1, x0, lsl #3	/* each mrs/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x1, x1, x0, lsl #2	/* + "bti j" instruction */
+#endif
+	br	x1
+
+1:	read	AMEVCNTVOFF00_EL2	/* index 0 */
+	.skip	8			/* AMEVCNTVOFF01_EL2 does not exist */
+#if ENABLE_BTI
+	.skip	4
+#endif
+	read	AMEVCNTVOFF02_EL2	/* index 2 */
+	read	AMEVCNTVOFF03_EL2	/* index 3 */
+endfunc amu_group0_voffset_read_internal
+
+/*
+ * void amu_group0_voffset_write_internal(int idx, uint64_t val);
+ *
+ * Given `idx`, write `val` to the corresponding AMU virtual offset register.
+ */
+func amu_group0_voffset_write_internal
+	adr	x2, 1f
+#if ENABLE_ASSERTIONS
+	/*
+	 * It can be dangerous to call this function with an
+	 * out of bounds index.  Ensure `idx` is valid.
+	 */
+	tst	x0, #~3
+	ASM_ASSERT(eq)
+	/* Make sure idx != 1 since AMEVCNTVOFF01_EL2 does not exist */
+	cmp	x0, #1
+	ASM_ASSERT(ne)
+#endif
+	/*
+	 * Given `idx` calculate address of mrs/ret instruction pair
+	 * in the table below.
+	 */
+	add	x2, x2, x0, lsl #3	/* each msr/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x2, x2, x0, lsl #2	/* + "bti j" instruction */
+#endif
+	br	x2
+
+1:	write	AMEVCNTVOFF00_EL2	/* index 0 */
+	.skip	8			/* AMEVCNTVOFF01_EL2 does not exist */
+#if ENABLE_BTI
+	.skip	4
+#endif
+	write	AMEVCNTVOFF02_EL2	/* index 2 */
+	write	AMEVCNTVOFF03_EL2	/* index 3 */
+endfunc amu_group0_voffset_write_internal
+
+/*
+ * uint64_t amu_group1_voffset_read_internal(int idx);
+ *
+ * Given `idx`, read the corresponding AMU virtual offset register
+ * and return it in `x0`.
+ */
+func amu_group1_voffset_read_internal
+	adr	x1, 1f
+#if ENABLE_ASSERTIONS
+	/*
+	 * It can be dangerous to call this function with an
+	 * out of bounds index.  Ensure `idx` is valid.
+	 */
+	tst	x0, #~0xF
+	ASM_ASSERT(eq)
+#endif
+	/*
+	 * Given `idx` calculate address of mrs/ret instruction pair
+	 * in the table below.
+	 */
+	add	x1, x1, x0, lsl #3	/* each mrs/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x1, x1, x0, lsl #2	/* + "bti j" instruction */
+#endif
+	br	x1
+
+1:	read	AMEVCNTVOFF10_EL2	/* index 0 */
+	read	AMEVCNTVOFF11_EL2	/* index 1 */
+	read	AMEVCNTVOFF12_EL2	/* index 2 */
+	read	AMEVCNTVOFF13_EL2	/* index 3 */
+	read	AMEVCNTVOFF14_EL2	/* index 4 */
+	read	AMEVCNTVOFF15_EL2	/* index 5 */
+	read	AMEVCNTVOFF16_EL2	/* index 6 */
+	read	AMEVCNTVOFF17_EL2	/* index 7 */
+	read	AMEVCNTVOFF18_EL2	/* index 8 */
+	read	AMEVCNTVOFF19_EL2	/* index 9 */
+	read	AMEVCNTVOFF1A_EL2	/* index 10 */
+	read	AMEVCNTVOFF1B_EL2	/* index 11 */
+	read	AMEVCNTVOFF1C_EL2	/* index 12 */
+	read	AMEVCNTVOFF1D_EL2	/* index 13 */
+	read	AMEVCNTVOFF1E_EL2	/* index 14 */
+	read	AMEVCNTVOFF1F_EL2	/* index 15 */
+endfunc amu_group1_voffset_read_internal
+
+/*
+ * void amu_group1_voffset_write_internal(int idx, uint64_t val);
+ *
+ * Given `idx`, write `val` to the corresponding AMU virtual offset register.
+ */
+func amu_group1_voffset_write_internal
+	adr	x2, 1f
+#if ENABLE_ASSERTIONS
+	/*
+	 * It can be dangerous to call this function with an
+	 * out of bounds index.  Ensure `idx` is valid.
+	 */
+	tst	x0, #~0xF
+	ASM_ASSERT(eq)
+#endif
+	/*
+	 * Given `idx` calculate address of mrs/ret instruction pair
+	 * in the table below.
+	 */
+	add	x2, x2, x0, lsl #3	/* each msr/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x2, x2, x0, lsl #2	/* + "bti j" instruction */
+#endif
+	br	x2
+
+1:	write	AMEVCNTVOFF10_EL2	/* index 0 */
+	write	AMEVCNTVOFF11_EL2	/* index 1 */
+	write	AMEVCNTVOFF12_EL2	/* index 2 */
+	write	AMEVCNTVOFF13_EL2	/* index 3 */
+	write	AMEVCNTVOFF14_EL2	/* index 4 */
+	write	AMEVCNTVOFF15_EL2	/* index 5 */
+	write	AMEVCNTVOFF16_EL2	/* index 6 */
+	write	AMEVCNTVOFF17_EL2	/* index 7 */
+	write	AMEVCNTVOFF18_EL2	/* index 8 */
+	write	AMEVCNTVOFF19_EL2	/* index 9 */
+	write	AMEVCNTVOFF1A_EL2	/* index 10 */
+	write	AMEVCNTVOFF1B_EL2	/* index 11 */
+	write	AMEVCNTVOFF1C_EL2	/* index 12 */
+	write	AMEVCNTVOFF1D_EL2	/* index 13 */
+	write	AMEVCNTVOFF1E_EL2	/* index 14 */
+	write	AMEVCNTVOFF1F_EL2	/* index 15 */
+endfunc amu_group1_voffset_write_internal
diff --git a/lib/extensions/ras/ras_common.c b/lib/extensions/ras/ras_common.c
index 36f9a95..622879e 100644
--- a/lib/extensions/ras/ras_common.c
+++ b/lib/extensions/ras/ras_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2018-2021, ARM Limited and Contributors. All rights reserved.
  * Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
@@ -139,7 +139,7 @@
 	assert(ras_interrupt_mappings.num_intrs > 0UL);
 
 	start = 0;
-	end = (int) ras_interrupt_mappings.num_intrs;
+	end = (int)ras_interrupt_mappings.num_intrs - 1;
 	while (start <= end) {
 		mid = ((end + start) / 2);
 		if (intr_raw == ras_inrs[mid].intr_number) {
diff --git a/lib/libc/memset.c b/lib/libc/memset.c
index f9dd4c5..17f798c 100644
--- a/lib/libc/memset.c
+++ b/lib/libc/memset.c
@@ -10,19 +10,20 @@
 
 void *memset(void *dst, int val, size_t count)
 {
-	char *ptr = dst;
+	uint8_t *ptr = dst;
 	uint64_t *ptr64;
 	uint64_t fill = (unsigned char)val;
 
 	/* Simplify code below by making sure we write at least one byte. */
-	if (count == 0) {
+	if (count == 0U) {
 		return dst;
 	}
 
 	/* Handle the first part, until the pointer becomes 64-bit aligned. */
-	while (((uintptr_t)ptr & 7)) {
-		*ptr++ = val;
-		if (--count == 0) {
+	while (((uintptr_t)ptr & 7U) != 0U) {
+		*ptr = (uint8_t)val;
+		ptr++;
+		if (--count == 0U) {
 			return dst;
 		}
 	}
@@ -33,15 +34,17 @@
 	fill |= fill << 32;
 
 	/* Use 64-bit writes for as long as possible. */
-	ptr64 = (void *)ptr;
-	for (; count >= 8; count -= 8) {
-		*ptr64++ = fill;
+	ptr64 = (uint64_t *)ptr;
+	for (; count >= 8U; count -= 8) {
+		*ptr64 = fill;
+		ptr64++;
 	}
 
 	/* Handle the remaining part byte-per-byte. */
-	ptr = (void *)ptr64;
-	while (count--) {
-		*ptr++ = val;
+	ptr = (uint8_t *)ptr64;
+	while (count-- > 0U)  {
+		*ptr = (uint8_t)val;
+		ptr++;
 	}
 
 	return dst;
diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk
index e94f3c3..8d0cd04 100644
--- a/make_helpers/defaults.mk
+++ b/make_helpers/defaults.mk
@@ -291,9 +291,10 @@
 # Include Memory Tagging Extension registers in cpu context. This must be set
 # to 1 if the platform wants to use this feature in the Secure world and MTE is
 # enabled at ELX.
-CTX_INCLUDE_MTE_REGS := 0
+CTX_INCLUDE_MTE_REGS		:= 0
 
 ENABLE_AMU			:= 0
+AMU_RESTRICT_COUNTERS		:= 0
 
 # By default, enable Scalable Vector Extension if implemented for Non-secure
 # lower ELs
diff --git a/plat/allwinner/common/allwinner-common.mk b/plat/allwinner/common/allwinner-common.mk
index bf48a70..da83b5e 100644
--- a/plat/allwinner/common/allwinner-common.mk
+++ b/plat/allwinner/common/allwinner-common.mk
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017-2019, ARM Limited and Contributors. All rights reserved.
+# Copyright (c) 2017-2021, ARM Limited and Contributors. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 #
@@ -20,8 +20,6 @@
 				${AW_PLAT}/common/sunxi_common.c
 
 BL31_SOURCES		+=	drivers/allwinner/axp/common.c		\
-				drivers/allwinner/sunxi_msgbox.c	\
-				drivers/arm/css/scpi/css_scpi.c		\
 				${GICV2_SOURCES}			\
 				drivers/delay_timer/delay_timer.c	\
 				drivers/delay_timer/generic_delay_timer.c \
@@ -29,14 +27,40 @@
 				plat/common/plat_gicv2.c		\
 				plat/common/plat_psci_common.c		\
 				${AW_PLAT}/common/sunxi_bl31_setup.c	\
-				${AW_PLAT}/common/sunxi_cpu_ops.c	\
-				${AW_PLAT}/common/sunxi_native_pm.c	\
 				${AW_PLAT}/common/sunxi_pm.c		\
-				${AW_PLAT}/common/sunxi_scpi_pm.c	\
 				${AW_PLAT}/${PLAT}/sunxi_power.c	\
 				${AW_PLAT}/common/sunxi_security.c	\
 				${AW_PLAT}/common/sunxi_topology.c
 
+# By default, attempt to use SCPI to the ARISC management processor. If SCPI
+# is not enabled or SCP firmware is not loaded, fall back to a simpler native
+# implementation that does not support CPU or system suspend.
+#
+# If SCP firmware will always be present (or absent), the unused implementation
+# can be compiled out.
+SUNXI_PSCI_USE_NATIVE	?=	1
+SUNXI_PSCI_USE_SCPI	?=	1
+
+$(eval $(call assert_boolean,SUNXI_PSCI_USE_NATIVE))
+$(eval $(call assert_boolean,SUNXI_PSCI_USE_SCPI))
+$(eval $(call add_define,SUNXI_PSCI_USE_NATIVE))
+$(eval $(call add_define,SUNXI_PSCI_USE_SCPI))
+
+ifeq (${SUNXI_PSCI_USE_NATIVE}${SUNXI_PSCI_USE_SCPI},00)
+$(error "At least one of SCPI or native PSCI ops must be enabled")
+endif
+
+ifeq (${SUNXI_PSCI_USE_NATIVE},1)
+BL31_SOURCES		+=	${AW_PLAT}/common/sunxi_cpu_ops.c	\
+				${AW_PLAT}/common/sunxi_native_pm.c
+endif
+
+ifeq (${SUNXI_PSCI_USE_SCPI},1)
+BL31_SOURCES		+=	drivers/allwinner/sunxi_msgbox.c	\
+				drivers/arm/css/scpi/css_scpi.c		\
+				${AW_PLAT}/common/sunxi_scpi_pm.c
+endif
+
 # The bootloader is guaranteed to only run on CPU 0 by the boot ROM.
 COLD_BOOT_SINGLE_CPU		:=	1
 
diff --git a/plat/allwinner/common/include/sunxi_private.h b/plat/allwinner/common/include/sunxi_private.h
index 79474e5..b68d23f 100644
--- a/plat/allwinner/common/include/sunxi_private.h
+++ b/plat/allwinner/common/include/sunxi_private.h
@@ -16,8 +16,21 @@
 void sunxi_cpu_power_off_self(void);
 void sunxi_power_down(void);
 
+#if SUNXI_PSCI_USE_NATIVE
 void sunxi_set_native_psci_ops(const plat_psci_ops_t **psci_ops);
+#else
+static inline void sunxi_set_native_psci_ops(const plat_psci_ops_t **psci_ops)
+{
+}
+#endif
+#if SUNXI_PSCI_USE_SCPI
 int sunxi_set_scpi_psci_ops(const plat_psci_ops_t **psci_ops);
+#else
+static inline int sunxi_set_scpi_psci_ops(const plat_psci_ops_t **psci_ops)
+{
+	return -1;
+}
+#endif
 int sunxi_validate_ns_entrypoint(uintptr_t ns_entrypoint);
 
 int sunxi_pmic_setup(uint16_t socid, const void *fdt);
diff --git a/plat/arm/board/arm_fpga/platform.mk b/plat/arm/board/arm_fpga/platform.mk
index 3ac1c01..7bc6a40 100644
--- a/plat/arm/board/arm_fpga/platform.mk
+++ b/plat/arm/board/arm_fpga/platform.mk
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, Arm Limited. All rights reserved.
+# Copyright (c) 2021, Arm Limited. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 #
@@ -68,7 +68,8 @@
 				lib/cpus/aarch64/cortex_a65.S		\
 				lib/cpus/aarch64/cortex_a65ae.S		\
 				lib/cpus/aarch64/cortex_klein.S		\
-				lib/cpus/aarch64/cortex_matterhorn.S
+				lib/cpus/aarch64/cortex_matterhorn.S	\
+				lib/cpus/aarch64/cortex_makalu.S
 
 # AArch64/AArch32 cores
 	FPGA_CPU_LIBS	+=	lib/cpus/aarch64/cortex_a55.S	\
diff --git a/plat/arm/board/fvp/platform.mk b/plat/arm/board/fvp/platform.mk
index 6c09d72..3bcfe91 100644
--- a/plat/arm/board/fvp/platform.mk
+++ b/plat/arm/board/fvp/platform.mk
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2013-2021, ARM Limited and Contributors. All rights reserved.
+# Copyright (c) 2013-2021, Arm Limited and Contributors. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 #
@@ -133,6 +133,7 @@
 					lib/cpus/aarch64/cortex_a78_ae.S	\
 					lib/cpus/aarch64/cortex_klein.S	        \
 					lib/cpus/aarch64/cortex_matterhorn.S	\
+					lib/cpus/aarch64/cortex_makalu.S	\
 					lib/cpus/aarch64/cortex_a65.S		\
 					lib/cpus/aarch64/cortex_a65ae.S
 	endif
diff --git a/plat/arm/board/juno/juno_decl.h b/plat/arm/board/juno/juno_decl.h
deleted file mode 100644
index 21e56c0..0000000
--- a/plat/arm/board/juno/juno_decl.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#ifndef JUNO_DECL_H
-#define JUNO_DECL_H
-
-bool juno_getentropy(uint64_t *buf);
-
-#endif /* JUNO_DECL_H */
diff --git a/plat/arm/board/juno/juno_stack_protector.c b/plat/arm/board/juno/juno_stack_protector.c
index 8c51f57..3924af8 100644
--- a/plat/arm/board/juno/juno_stack_protector.c
+++ b/plat/arm/board/juno/juno_stack_protector.c
@@ -7,15 +7,14 @@
 #include <arch_helpers.h>
 #include <common/debug.h>
 #include <lib/utils.h>
+#include <plat/common/plat_trng.h>
 #include <platform_def.h>
 
-#include "juno_decl.h"
-
 u_register_t plat_get_stack_protector_canary(void)
 {
 	uint64_t entropy;
 
-	if (!juno_getentropy(&entropy)) {
+	if (!plat_get_entropy(&entropy)) {
 		ERROR("Not enough entropy to initialize canary value\n");
 		panic();
 	}
diff --git a/plat/arm/board/juno/juno_trng.c b/plat/arm/board/juno/juno_trng.c
index b38e49f..09552a6 100644
--- a/plat/arm/board/juno/juno_trng.c
+++ b/plat/arm/board/juno/juno_trng.c
@@ -4,6 +4,7 @@
  * SPDX-License-Identifier: BSD-3-Clause
  */
 
+#include <arm_acle.h>
 #include <assert.h>
 #include <stdbool.h>
 #include <stdint.h>
@@ -13,7 +14,11 @@
 #include <lib/utils_def.h>
 #include <platform_def.h>
 
-#include "juno_decl.h"
+#include <lib/smccc.h>
+#include <services/trng_svc.h>
+#include <smccc_helpers.h>
+
+#include <plat/common/platform.h>
 
 #define NSAMPLE_CLOCKS	1 /* min 1 cycle, max 231 cycles */
 #define NRETRIES	5
@@ -35,18 +40,24 @@
 	return false; /* No output data available. */
 }
 
+DEFINE_SVC_UUID2(_plat_trng_uuid,
+	0x23523c58, 0x7448, 0x4083, 0x9d, 0x16,
+	0xe3, 0xfa, 0xb9, 0xf1, 0x73, 0xbc
+);
+uuid_t plat_trng_uuid;
+
+static uint32_t crc_value = ~0U;
+
 /*
- * This function fills `buf` with 8 bytes of entropy.
- * It uses the Trusted Entropy Source peripheral on Juno.
- * Returns 'true' when the buffer has been filled with entropy
- * successfully, or 'false' otherwise.
+ * Uses the Trusted Entropy Source peripheral on Juno to return 8 bytes of
+ * entropy. Returns 'true' when done successfully, 'false' otherwise.
  */
-bool juno_getentropy(uint64_t *buf)
+bool plat_get_entropy(uint64_t *out)
 {
 	uint64_t ret;
 
-	assert(buf);
-	assert(!check_uptr_overflow((uintptr_t)buf, sizeof(*buf)));
+	assert(out);
+	assert(!check_uptr_overflow((uintptr_t)out, sizeof(*out)));
 
 	if (!juno_trng_initialized) {
 		/* Disable interrupt mode. */
@@ -69,14 +80,14 @@
 			return false;
 	}
 
-	/* XOR each two 32-bit registers together, combine the pairs */
-	ret = mmio_read_32(TRNG_BASE + 0);
-	ret ^= mmio_read_32(TRNG_BASE + 4);
-	ret <<= 32;
+	/* CRC each two 32-bit registers together, combine the pairs */
+	crc_value = __crc32w(crc_value, mmio_read_32(TRNG_BASE + 0));
+	crc_value = __crc32w(crc_value, mmio_read_32(TRNG_BASE + 4));
+	ret = (uint64_t)crc_value << 32;
 
-	ret |= mmio_read_32(TRNG_BASE + 8);
-	ret ^= mmio_read_32(TRNG_BASE + 12);
-	*buf = ret;
+	crc_value = __crc32w(crc_value, mmio_read_32(TRNG_BASE + 8));
+	crc_value = __crc32w(crc_value, mmio_read_32(TRNG_BASE + 12));
+	*out = ret | crc_value;
 
 	/* Acknowledge current cycle, clear output registers. */
 	mmio_write_32(TRNG_BASE + TRNG_STATUS, 1);
@@ -85,3 +96,13 @@
 
 	return true;
 }
+
+void plat_entropy_setup(void)
+{
+	uint64_t dummy;
+
+	plat_trng_uuid = _plat_trng_uuid;
+
+	/* Initialise the entropy source and trigger RNG generation */
+	plat_get_entropy(&dummy);
+}
diff --git a/plat/arm/board/juno/platform.mk b/plat/arm/board/juno/platform.mk
index 61cfb61..5cf5749 100644
--- a/plat/arm/board/juno/platform.mk
+++ b/plat/arm/board/juno/platform.mk
@@ -44,6 +44,8 @@
 $(eval $(call add_define,JUNO_TZMP1))
 endif
 
+TRNG_SUPPORT		:=	1
+
 ifeq (${JUNO_AARCH32_EL3_RUNTIME}, 1)
 # Include BL32 in FIP
 NEED_BL32		:= yes
@@ -164,6 +166,12 @@
     endif
 endif
 
+BL1_CPPFLAGS += -march=armv8-a+crc
+BL2_CPPFLAGS += -march=armv8-a+crc
+BL2U_CPPFLAGS += -march=armv8-a+crc
+BL31_CPPFLAGS += -march=armv8-a+crc
+BL32_CPPFLAGS += -march=armv8-a+crc
+
 # Add the FDT_SOURCES and options for Dynamic Config
 FDT_SOURCES		+=	plat/arm/board/juno/fdts/${PLAT}_fw_config.dts	\
 				plat/arm/board/juno/fdts/${PLAT}_tb_fw_config.dts
diff --git a/plat/marvell/armada/a8k/a80x0/board/dram_port.c b/plat/marvell/armada/a8k/a80x0/board/dram_port.c
index 381c871..47bc0a8 100644
--- a/plat/marvell/armada/a8k/a80x0/board/dram_port.c
+++ b/plat/marvell/armada/a8k/a80x0/board/dram_port.c
@@ -138,7 +138,7 @@
 		i2c_init((void *)MVEBU_CP0_I2C_BASE);
 
 		/* select SPD memory page 0 to access DRAM configuration */
-		i2c_write(I2C_SPD_P0_ADDR, 0x0, 1, tm->spd_data.all_bytes, 1);
+		i2c_write(I2C_SPD_P0_ADDR, 0x0, 1, tm->spd_data.all_bytes, 0);
 
 		/* read data from spd */
 		i2c_read(I2C_SPD_ADDR, 0x0, 1, tm->spd_data.all_bytes,
diff --git a/plat/marvell/armada/a8k/a80x0_mcbin/board/dram_port.c b/plat/marvell/armada/a8k/a80x0_mcbin/board/dram_port.c
index 50a68b3..85c931c 100644
--- a/plat/marvell/armada/a8k/a80x0_mcbin/board/dram_port.c
+++ b/plat/marvell/armada/a8k/a80x0_mcbin/board/dram_port.c
@@ -123,7 +123,7 @@
 		/* initialize the i2c */
 		i2c_init((void *)MVEBU_CP0_I2C_BASE);
 		/* select SPD memory page 0 to access DRAM configuration */
-		i2c_write(I2C_SPD_P0_ADDR, 0x0, 1, tm->spd_data.all_bytes, 1);
+		i2c_write(I2C_SPD_P0_ADDR, 0x0, 1, tm->spd_data.all_bytes, 0);
 		/* read data from spd */
 		i2c_read(I2C_SPD_ADDR, 0x0, 1, tm->spd_data.all_bytes,
 			 sizeof(tm->spd_data.all_bytes));
diff --git a/plat/marvell/armada/a8k/a80x0_puzzle/board/dram_port.c b/plat/marvell/armada/a8k/a80x0_puzzle/board/dram_port.c
index 3879c98..1d8e9d2 100644
--- a/plat/marvell/armada/a8k/a80x0_puzzle/board/dram_port.c
+++ b/plat/marvell/armada/a8k/a80x0_puzzle/board/dram_port.c
@@ -132,7 +132,7 @@
 		/* initialize the MVEBU_AP_I2C_BASE I2C bus */
 		i2c_init((void *)MVEBU_AP_I2C_BASE);
 		/* select SPD memory page 0 to access DRAM configuration */
-		i2c_write(I2C_SPD_P0_ADDR, 0x0, 1, tm->spd_data.all_bytes, 1);
+		i2c_write(I2C_SPD_P0_ADDR, 0x0, 1, tm->spd_data.all_bytes, 0);
 		/* read data from spd */
 		i2c_read(I2C_SPD_ADDR, 0x0, 1, tm->spd_data.all_bytes,
 			 sizeof(tm->spd_data.all_bytes));
diff --git a/plat/marvell/armada/a8k/common/a8k_common.mk b/plat/marvell/armada/a8k/common/a8k_common.mk
index 63cfce2..8a463ea 100644
--- a/plat/marvell/armada/a8k/common/a8k_common.mk
+++ b/plat/marvell/armada/a8k/common/a8k_common.mk
@@ -114,7 +114,8 @@
 				$(MARVELL_DRV_BASE)/cache_llc.c	\
 				$(MARVELL_DRV_BASE)/comphy/phy-comphy-cp110.c \
 				$(MARVELL_DRV_BASE)/mc_trustzone/mc_trustzone.c \
-				$(MARVELL_DRV_BASE)/mg_conf_cm3/mg_conf_cm3.c
+				$(MARVELL_DRV_BASE)/mg_conf_cm3/mg_conf_cm3.c \
+				drivers/rambus/trng_ip_76.c
 
 BL31_PORTING_SOURCES	:=	$(BOARD_DIR)/board/marvell_plat_config.c
 
diff --git a/plat/marvell/armada/a8k/common/ble/ble.mk b/plat/marvell/armada/a8k/common/ble/ble.mk
index 78c62a0..d6d72c1 100644
--- a/plat/marvell/armada/a8k/common/ble/ble.mk
+++ b/plat/marvell/armada/a8k/common/ble/ble.mk
@@ -13,6 +13,7 @@
 BLE_SOURCES		+= 	$(BLE_PATH)/ble_main.c				\
 				$(BLE_PATH)/ble_mem.S				\
 				drivers/delay_timer/delay_timer.c		\
+				drivers/marvell/iob.c				\
 				$(PLAT_MARVELL)/common/aarch64/marvell_helpers.S \
 				$(PLAT_MARVELL)/common/plat_delay_timer.c	\
 				$(PLAT_MARVELL)/common/marvell_console.c
diff --git a/plat/marvell/armada/a8k/common/mss/mss_bl2_setup.c b/plat/marvell/armada/a8k/common/mss/mss_bl2_setup.c
index b919cb3..71fa2b8 100644
--- a/plat/marvell/armada/a8k/common/mss/mss_bl2_setup.c
+++ b/plat/marvell/armada/a8k/common/mss/mss_bl2_setup.c
@@ -30,6 +30,10 @@
 #define MSS_EXTERNAL_ADDR_MASK		0xfffffff
 #define MSS_INTERNAL_ACCESS_BIT		28
 
+#define MSS_AP_REGS_OFFSET		0x580000
+#define MSS_CP_SRAM_OFFSET		0x220000
+#define MSS_CP_REGS_OFFSET		0x280000
+
 struct addr_map_win ccu_mem_map[] = {
 	{MVEBU_CP_REGS_BASE(0), 0x4000000, IO_0_TID}
 };
@@ -121,12 +125,21 @@
 
 uintptr_t bl2_plat_get_cp_mss_regs(int ap_idx, int cp_idx)
 {
-	return MVEBU_CP_REGS_BASE(cp_idx) + 0x280000;
+	return MVEBU_CP_REGS_BASE(cp_idx) + MSS_CP_REGS_OFFSET;
+}
+
+uintptr_t bl2_plat_get_cp_mss_sram(int ap_idx, int cp_idx)
+{
+	if (is_secure()) {
+		return MVEBU_CP_REGS_BASE(cp_idx) + MSS_CP_SRAM_OFFSET;
+	}
+
+	return 0; /* SRAM will not be used */
 }
 
 uintptr_t bl2_plat_get_ap_mss_regs(int ap_idx)
 {
-	return MVEBU_REGS_BASE + 0x580000;
+	return MVEBU_REGS_BASE + MSS_AP_REGS_OFFSET;
 }
 
 uint32_t bl2_plat_get_cp_count(int ap_idx)
diff --git a/plat/marvell/armada/a8k/common/plat_ble_setup.c b/plat/marvell/armada/a8k/common/plat_ble_setup.c
index e4e09fb..4114327 100644
--- a/plat/marvell/armada/a8k/common/plat_ble_setup.c
+++ b/plat/marvell/armada/a8k/common/plat_ble_setup.c
@@ -720,7 +720,7 @@
 
 int ble_plat_setup(int *skip)
 {
-	int ret;
+	int ret, cp;
 	unsigned int freq_mode;
 
 	/* Power down unused CPUs */
@@ -745,6 +745,10 @@
 	/* Do required CP-110 setups for BLE stage */
 	cp110_ble_init(MVEBU_CP_REGS_BASE(0));
 
+	/* Config address for each cp other than cp0 */
+	for (cp = 1; cp < CP_COUNT; cp++)
+		update_cp110_default_win(cp);
+
 	/* Setup AVS */
 	ble_plat_svc_config();
 
diff --git a/plat/marvell/armada/common/mrvl_sip_svc.c b/plat/marvell/armada/common/mrvl_sip_svc.c
index 0291024..64187fb 100644
--- a/plat/marvell/armada/common/mrvl_sip_svc.c
+++ b/plat/marvell/armada/common/mrvl_sip_svc.c
@@ -9,6 +9,7 @@
 #include <common/runtime_svc.h>
 #include <drivers/marvell/cache_llc.h>
 #include <drivers/marvell/mochi/ap_setup.h>
+#include <drivers/rambus/trng_ip_76.h>
 #include <lib/smccc.h>
 
 #include <marvell_plat_priv.h>
@@ -37,6 +38,9 @@
 #define MV_SIP_PMU_IRQ_ENABLE	0x82000012
 #define MV_SIP_PMU_IRQ_DISABLE	0x82000013
 
+/* TRNG */
+#define MV_SIP_RNG_64		0xC200FF11
+
 #define MAX_LANE_NR		6
 #define MVEBU_COMPHY_OFFSET	0x441000
 #define MVEBU_CP_BASE_MASK	(~0xffffff)
@@ -68,6 +72,7 @@
 			       u_register_t flags)
 {
 	u_register_t ret;
+	uint32_t w2[2] = {0, 0};
 	int i;
 
 	debug("%s: got SMC (0x%x) x1 0x%lx, x2 0x%lx, x3 0x%lx\n",
@@ -131,7 +136,9 @@
 		mvebu_pmu_interrupt_disable();
 		SMC_RET1(handle, 0);
 #endif
-
+	case MV_SIP_RNG_64:
+		ret = eip76_rng_get_random((uint8_t *)&w2, 4 * (x1 % 2 + 1));
+		SMC_RET3(handle, ret, w2[0], w2[1]);
 	default:
 		ERROR("%s: unhandled SMC (0x%x)\n", __func__, smc_fid);
 		SMC_RET1(handle, SMC_UNK);
diff --git a/plat/marvell/armada/common/mss/mss_scp_bl2_format.h b/plat/marvell/armada/common/mss/mss_scp_bl2_format.h
index 74dddc6..90913b0 100644
--- a/plat/marvell/armada/common/mss/mss_scp_bl2_format.h
+++ b/plat/marvell/armada/common/mss/mss_scp_bl2_format.h
@@ -13,6 +13,7 @@
 #define HEADER_VERSION	0x1
 
 #define MSS_IDRAM_SIZE	0x10000 /* 64KB */
+#define MSS_SRAM_SIZE	0x8000 /* 32KB */
 
 /* Types definitions */
 typedef struct file_header {
diff --git a/plat/marvell/armada/common/mss/mss_scp_bootloader.c b/plat/marvell/armada/common/mss/mss_scp_bootloader.c
index adf570e..f669a77 100644
--- a/plat/marvell/armada/common/mss/mss_scp_bootloader.c
+++ b/plat/marvell/armada/common/mss/mss_scp_bootloader.c
@@ -38,6 +38,8 @@
 #define MSS_DMA_TIMEOUT			1000
 #define MSS_EXTERNAL_SPACE		0x50000000
 #define MSS_EXTERNAL_ADDR_MASK		0xfffffff
+#define MSS_INTERNAL_SPACE		0x40000000
+#define MSS_INTERNAL_ADDR_MASK		0x00ffffff
 
 #define DMA_SIZE			128
 
@@ -60,60 +62,113 @@
 	return 0;
 }
 
-static int mss_image_load(uint32_t src_addr, uint32_t size, uintptr_t mss_regs)
+static int mss_iram_dma_load(uint32_t src_addr, uint32_t size,
+			     uintptr_t mss_regs)
 {
 	uint32_t i, loop_num, timeout;
 
+	/* load image to MSS RAM using DMA */
+	loop_num = (size / DMA_SIZE) + !!(size % DMA_SIZE);
+	for (i = 0; i < loop_num; i++) {
+		/* write source address */
+		mmio_write_32(MSS_DMA_SRCBR(mss_regs),
+			      src_addr + (i * DMA_SIZE));
+		/* write destination address */
+		mmio_write_32(MSS_DMA_DSTBR(mss_regs), (i * DMA_SIZE));
+		/* make sure DMA data is ready before triggering it */
+		dsb();
+		/* set the DMA control register */
+		mmio_write_32(MSS_DMA_CTRLR(mss_regs),
+			      ((MSS_DMA_CTRLR_REQ_SET <<
+				MSS_DMA_CTRLR_REQ_OFFSET) |
+			      (DMA_SIZE << MSS_DMA_CTRLR_SIZE_OFFSET)));
+		/* Poll DMA_ACK at MSS_DMACTLR until it is ready */
+		timeout = MSS_DMA_TIMEOUT;
+		while (timeout > 0U) {
+			if ((mmio_read_32(MSS_DMA_CTRLR(mss_regs)) >>
+					  (MSS_DMA_CTRLR_ACK_OFFSET &
+					   MSS_DMA_CTRLR_ACK_MASK))
+					  == MSS_DMA_CTRLR_ACK_READY) {
+				break;
+			}
+			udelay(50);
+			timeout--;
+		}
+		if (timeout == 0) {
+			ERROR("\nMSS DMA failed (timeout)\n");
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int mss_image_load(uint32_t src_addr, uint32_t size,
+			  uintptr_t mss_regs, uintptr_t sram)
+{
+	uint32_t chunks = 1; /* !sram case */
+	uint32_t chunk_num;
+	int ret;
+
 	/* Check if the img size is not bigger than ID-RAM size of MSS CM3 */
 	if (size > MSS_IDRAM_SIZE) {
 		ERROR("image is too big to fit into MSS CM3 memory\n");
 		return 1;
 	}
 
-	NOTICE("Loading MSS image from addr. 0x%x Size 0x%x to MSS at 0x%lx\n",
-	       src_addr, size, mss_regs);
-	/* load image to MSS RAM using DMA */
-	loop_num = (size / DMA_SIZE) + (((size & (DMA_SIZE - 1)) == 0) ? 0 : 1);
+	/* The CPx MSS DMA cannot access DRAM directly in secure boot mode
+	 * Copy the MSS FW image to MSS SRAM by the CPU first, then run
+	 * MSS DMA for SRAM to IRAM copy
+	 */
+	if (sram != 0) {
+		chunks = size / MSS_SRAM_SIZE + !!(size % MSS_SRAM_SIZE);
+	}
 
-	for (i = 0; i < loop_num; i++) {
-		/* write destination and source addresses */
-		mmio_write_32(MSS_DMA_SRCBR(mss_regs),
-			      MSS_EXTERNAL_SPACE |
-			      ((src_addr & MSS_EXTERNAL_ADDR_MASK) +
-			      (i * DMA_SIZE)));
-		mmio_write_32(MSS_DMA_DSTBR(mss_regs), (i * DMA_SIZE));
+	NOTICE("%s Loading MSS FW from addr. 0x%x Size 0x%x to MSS at 0x%lx\n",
+	       sram == 0 ? "" : "SECURELY", src_addr, size, mss_regs);
+	for (chunk_num = 0; chunk_num < chunks; chunk_num++) {
+		size_t chunk_size = size;
+		uint32_t img_src = MSS_EXTERNAL_SPACE | /* no SRAM */
+				   (src_addr & MSS_EXTERNAL_ADDR_MASK);
 
-		dsb(); /* make sure DMA data is ready before triggering it */
+		if (sram != 0) {
+			uintptr_t chunk_source =
+				  src_addr + MSS_SRAM_SIZE * chunk_num;
 
-		/* set the DMA control register */
-		mmio_write_32(MSS_DMA_CTRLR(mss_regs), ((MSS_DMA_CTRLR_REQ_SET
-			      << MSS_DMA_CTRLR_REQ_OFFSET) |
-			      (DMA_SIZE << MSS_DMA_CTRLR_SIZE_OFFSET)));
+			if (chunk_num != (size / MSS_SRAM_SIZE)) {
+				chunk_size = MSS_SRAM_SIZE;
+			} else {
+				chunk_size =  size % MSS_SRAM_SIZE;
+			}
 
-		/* Poll DMA_ACK at MSS_DMACTLR until it is ready */
-		timeout = MSS_DMA_TIMEOUT;
-		while (timeout) {
-			if ((mmio_read_32(MSS_DMA_CTRLR(mss_regs)) >>
-			     MSS_DMA_CTRLR_ACK_OFFSET & MSS_DMA_CTRLR_ACK_MASK)
-				== MSS_DMA_CTRLR_ACK_READY) {
+			if (chunk_size == 0) {
 				break;
 			}
 
-			udelay(50);
-			timeout--;
+			VERBOSE("Chunk %d -> SRAM 0x%lx from 0x%lx SZ 0x%lx\n",
+				chunk_num, sram, chunk_source, chunk_size);
+			memcpy((void *)sram, (void *)chunk_source, chunk_size);
+			dsb();
+			img_src = MSS_INTERNAL_SPACE |
+				  (sram & MSS_INTERNAL_ADDR_MASK);
 		}
 
-		if (timeout == 0) {
-			ERROR("\nDMA failed to load MSS image\n");
-			return 1;
+		ret = mss_iram_dma_load(img_src, chunk_size, mss_regs);
+		if (ret != 0) {
+			ERROR("MSS FW chunk %d load failed\n", chunk_num);
+			return ret;
 		}
 	}
 
 	bl2_plat_configure_mss_windows(mss_regs);
 
+	/* Wipe the MSS SRAM after using it as copy buffer */
+	if (sram) {
+		memset((void *)sram, 0, MSS_SRAM_SIZE);
+	}
+
 	/* Release M3 from reset */
-	mmio_write_32(MSS_M3_RSTCR(mss_regs), (MSS_M3_RSTCR_RST_OFF <<
-		      MSS_M3_RSTCR_RST_OFFSET));
+	mmio_write_32(MSS_M3_RSTCR(mss_regs),
+		     (MSS_M3_RSTCR_RST_OFF << MSS_M3_RSTCR_RST_OFFSET));
 
 	NOTICE("Done\n");
 
@@ -162,7 +217,7 @@
 	VERBOSE("Send info about the SCP_BL2 image to be transferred to SCP\n");
 
 	ret = mss_image_load(single_img, image_size,
-			     bl2_plat_get_ap_mss_regs(ap_idx));
+			     bl2_plat_get_ap_mss_regs(ap_idx), 0);
 	if (ret != 0) {
 		ERROR("SCP Image load failed\n");
 		return -1;
@@ -218,6 +273,8 @@
 			       cp_index, ap_idx);
 			ret = mss_image_load(single_img, image_size,
 					     bl2_plat_get_cp_mss_regs(
+						     ap_idx, cp_index),
+					     bl2_plat_get_cp_mss_sram(
 						     ap_idx, cp_index));
 			if (ret != 0) {
 				ERROR("SCP Image load failed\n");
diff --git a/plat/marvell/armada/common/mss/mss_scp_bootloader.h b/plat/marvell/armada/common/mss/mss_scp_bootloader.h
index 4950d24..d65354a 100644
--- a/plat/marvell/armada/common/mss/mss_scp_bootloader.h
+++ b/plat/marvell/armada/common/mss/mss_scp_bootloader.h
@@ -10,6 +10,7 @@
 
 int scp_bootloader_transfer(void *image, unsigned int image_size);
 uintptr_t bl2_plat_get_cp_mss_regs(int ap_idx, int cp_idx);
+uintptr_t bl2_plat_get_cp_mss_sram(int ap_idx, int cp_idx);
 uintptr_t bl2_plat_get_ap_mss_regs(int ap_idx);
 uint32_t bl2_plat_get_cp_count(int ap_idx);
 uint32_t bl2_plat_get_ap_count(void);
diff --git a/plat/marvell/octeontx/otx2/t91/t9130/board/dram_port.c b/plat/marvell/octeontx/otx2/t91/t9130/board/dram_port.c
index 0befadf..82ce07b 100644
--- a/plat/marvell/octeontx/otx2/t91/t9130/board/dram_port.c
+++ b/plat/marvell/octeontx/otx2/t91/t9130/board/dram_port.c
@@ -149,7 +149,7 @@
 		i2c_init((void *)MVEBU_CP0_I2C_BASE);
 
 		/* select SPD memory page 0 to access DRAM configuration */
-		i2c_write(I2C_SPD_P0_ADDR, 0x0, 1, tm->spd_data.all_bytes, 1);
+		i2c_write(I2C_SPD_P0_ADDR, 0x0, 1, tm->spd_data.all_bytes, 0);
 
 		/* read data from spd */
 		i2c_read(I2C_SPD_ADDR, 0x0, 1, tm->spd_data.all_bytes,
diff --git a/plat/marvell/octeontx/otx2/t91/t9130/board/marvell_plat_config.c b/plat/marvell/octeontx/otx2/t91/t9130/board/marvell_plat_config.c
index 7debd65..fbacf54 100644
--- a/plat/marvell/octeontx/otx2/t91/t9130/board/marvell_plat_config.c
+++ b/plat/marvell/octeontx/otx2/t91/t9130/board/marvell_plat_config.c
@@ -46,15 +46,19 @@
  *****************************************************************************
  */
 struct addr_map_win io_win_memory_map[] = {
+#if (CP_COUNT > 1)
+	/* SB (MCi0) internal regs */
+	{0x00000000f4000000,		0x2000000,	MCI_0_TID},
+#if (CP_COUNT > 2)
+	/* SB (MCi1) internal regs */
+	{0x00000000f6000000,		0x2000000,	MCI_1_TID},
+#endif
+#endif
 #ifndef IMAGE_BLE
 	/* SB (MCi0) PCIe0-2 on CP1 */
 	{0x00000000e2000000,		0x3000000,	MCI_0_TID},
 	/* SB (MCi1) PCIe0-2 on CP2 */
 	{0x00000000e5000000,		0x3000000,	MCI_1_TID},
-	/* SB (MCi0) internal regs */
-	{0x00000000f4000000,		0x2000000,	MCI_0_TID},
-	/* SB (MCi1) internal regs */
-	{0x00000000f6000000,		0x2000000,	MCI_1_TID},
 	/* MCI 0 indirect window */
 	{MVEBU_MCI_REG_BASE_REMAP(0),	0x100000,	MCI_0_TID},
 	/* MCI 1 indirect window */
diff --git a/plat/qemu/common/qemu_bl31_setup.c b/plat/qemu/common/qemu_bl31_setup.c
index 4d36b03..4f60eb1 100644
--- a/plat/qemu/common/qemu_bl31_setup.c
+++ b/plat/qemu/common/qemu_bl31_setup.c
@@ -7,6 +7,7 @@
 #include <assert.h>
 
 #include <common/bl_common.h>
+#include <drivers/arm/pl061_gpio.h>
 #include <plat/common/platform.h>
 
 #include "qemu_private.h"
@@ -69,9 +70,18 @@
 			      BL_COHERENT_RAM_BASE, BL_COHERENT_RAM_END);
 }
 
+static void qemu_gpio_init(void)
+{
+#ifdef SECURE_GPIO_BASE
+	pl061_gpio_init();
+	pl061_gpio_register(SECURE_GPIO_BASE, 0);
+#endif
+}
+
 void bl31_platform_setup(void)
 {
 	plat_qemu_gic_init();
+	qemu_gpio_init();
 }
 
 unsigned int plat_get_syscnt_freq2(void)
diff --git a/plat/qemu/common/qemu_pm.c b/plat/qemu/common/qemu_pm.c
index cf80009..c4ffcf9 100644
--- a/plat/qemu/common/qemu_pm.c
+++ b/plat/qemu/common/qemu_pm.c
@@ -12,6 +12,7 @@
 #include <lib/psci/psci.h>
 #include <lib/semihosting.h>
 #include <plat/common/platform.h>
+#include <drivers/gpio.h>
 
 #include "qemu_private.h"
 
@@ -201,16 +202,31 @@
 /*******************************************************************************
  * Platform handlers to shutdown/reboot the system
  ******************************************************************************/
+
 static void __dead2 qemu_system_off(void)
 {
+#ifdef SECURE_GPIO_BASE
+	ERROR("QEMU System Power off: with GPIO.\n");
+	gpio_set_direction(SECURE_GPIO_POWEROFF, GPIO_DIR_OUT);
+	gpio_set_value(SECURE_GPIO_POWEROFF, GPIO_LEVEL_HIGH);
+	gpio_set_value(SECURE_GPIO_POWEROFF, GPIO_LEVEL_LOW);
+#else
 	semihosting_exit(ADP_STOPPED_APPLICATION_EXIT, 0);
 	ERROR("QEMU System Off: semihosting call unexpectedly returned.\n");
+#endif
 	panic();
 }
 
 static void __dead2 qemu_system_reset(void)
 {
+	ERROR("QEMU System Reset: with GPIO.\n");
+#ifdef SECURE_GPIO_BASE
+	gpio_set_direction(SECURE_GPIO_RESET, GPIO_DIR_OUT);
+	gpio_set_value(SECURE_GPIO_RESET, GPIO_LEVEL_HIGH);
+	gpio_set_value(SECURE_GPIO_RESET, GPIO_LEVEL_LOW);
+#else
 	ERROR("QEMU System Reset: operation not handled.\n");
+#endif
 	panic();
 }
 
diff --git a/plat/qemu/qemu/include/platform_def.h b/plat/qemu/qemu/include/platform_def.h
index e6bb1e6..fbcaa63 100644
--- a/plat/qemu/qemu/include/platform_def.h
+++ b/plat/qemu/qemu/include/platform_def.h
@@ -89,6 +89,11 @@
 #define SEC_DRAM_BASE			0x0e100000
 #define SEC_DRAM_SIZE			0x00f00000
 
+#define SECURE_GPIO_BASE		0x090b0000
+#define SECURE_GPIO_SIZE		0x00001000
+#define SECURE_GPIO_POWEROFF		0
+#define SECURE_GPIO_RESET		1
+
 /* Load pageable part of OP-TEE 2MB above secure DRAM base */
 #define QEMU_OPTEE_PAGEABLE_LOAD_BASE	(SEC_DRAM_BASE + 0x00200000)
 #define QEMU_OPTEE_PAGEABLE_LOAD_SIZE	0x00400000
@@ -210,7 +215,7 @@
 #define DEVICE0_BASE			0x08000000
 #define DEVICE0_SIZE			0x01000000
 #define DEVICE1_BASE			0x09000000
-#define DEVICE1_SIZE			0x00041000
+#define DEVICE1_SIZE			0x00c00000
 
 /*
  * GIC related constants
diff --git a/plat/qemu/qemu/platform.mk b/plat/qemu/qemu/platform.mk
index 14bf049..88a95c8 100644
--- a/plat/qemu/qemu/platform.mk
+++ b/plat/qemu/qemu/platform.mk
@@ -163,6 +163,8 @@
 				lib/semihosting/semihosting.c		\
 				lib/semihosting/${ARCH}/semihosting_call.S \
 				plat/common/plat_psci_common.c		\
+				drivers/arm/pl061/pl061_gpio.c		\
+				drivers/gpio/gpio.c			\
 				${PLAT_QEMU_COMMON_PATH}/qemu_pm.c			\
 				${PLAT_QEMU_COMMON_PATH}/topology.c			\
 				${PLAT_QEMU_COMMON_PATH}/aarch64/plat_helpers.S	\
diff --git a/plat/xilinx/zynqmp/aarch64/zynqmp_helpers.S b/plat/xilinx/zynqmp/aarch64/zynqmp_helpers.S
index 7eab337..d8439f7 100644
--- a/plat/xilinx/zynqmp/aarch64/zynqmp_helpers.S
+++ b/plat/xilinx/zynqmp/aarch64/zynqmp_helpers.S
@@ -12,9 +12,6 @@
 	.globl	plat_is_my_cpu_primary
 	.globl	zynqmp_calc_core_pos
 	.globl	plat_my_core_pos
-	.globl	plat_crash_console_init
-	.globl	plat_crash_console_putc
-	.globl	plat_crash_console_flush
 	.globl	platform_mem_init
 
 	/* -----------------------------------------------------
@@ -79,45 +76,6 @@
 	ret
 endfunc zynqmp_calc_core_pos
 
-	/* ---------------------------------------------
-	 * int plat_crash_console_init(void)
-	 * Function to initialize the crash console
-	 * without a C Runtime to print crash report.
-	 * Clobber list : x0 - x4
-	 * ---------------------------------------------
-	 */
-func plat_crash_console_init
-	mov_imm	x0, ZYNQMP_CRASH_UART_BASE
-	mov_imm	x1, ZYNQMP_CRASH_UART_CLK_IN_HZ
-	mov_imm	x2, ZYNQMP_UART_BAUDRATE
-	b	console_cdns_core_init
-endfunc plat_crash_console_init
-
-	/* ---------------------------------------------
-	 * int plat_crash_console_putc(int c)
-	 * Function to print a character on the crash
-	 * console without a C Runtime.
-	 * Clobber list : x1, x2
-	 * ---------------------------------------------
-	 */
-func plat_crash_console_putc
-	mov_imm	x1, ZYNQMP_CRASH_UART_BASE
-	b	console_cdns_core_putc
-endfunc plat_crash_console_putc
-
-	/* ---------------------------------------------
-	 * void plat_crash_console_flush()
-	 * Function to force a write of all buffered
-	 * data that hasn't been output.
-	 * Out : void.
-	 * Clobber list : r0
-	 * ---------------------------------------------
-	 */
-func plat_crash_console_flush
-	mov_imm	x0, ZYNQMP_CRASH_UART_BASE
-	b	console_cdns_core_flush
-endfunc plat_crash_console_flush
-
 	/* ---------------------------------------------------------------------
 	 * We don't need to carry out any memory initialization on ARM
 	 * platforms. The Secure RAM is accessible straight away.
diff --git a/plat/xilinx/zynqmp/platform.mk b/plat/xilinx/zynqmp/platform.mk
index 1cd168f..6e700b9 100644
--- a/plat/xilinx/zynqmp/platform.mk
+++ b/plat/xilinx/zynqmp/platform.mk
@@ -73,7 +73,8 @@
 				plat/arm/common/arm_gicv2.c			\
 				plat/common/plat_gicv2.c			\
 				plat/xilinx/common/ipi.c			\
-				plat/xilinx/zynqmp/zynqmp_ipi.c		\
+				plat/xilinx/zynqmp/zynqmp_ipi.c			\
+				plat/common/aarch64/crash_console_helpers.S	\
 				plat/xilinx/zynqmp/aarch64/zynqmp_helpers.S	\
 				plat/xilinx/zynqmp/aarch64/zynqmp_common.c