Merge remote-tracking branch 'upstream/chromeos-3.14' into chromeos-3.14__release/core40-12-12

Change-Id: I3bf29a88d37464838773df4e6839c8e043e5d4a3
diff --git a/COMMIT-QUEUE.ini b/COMMIT-QUEUE.ini
index 930dd8c..5760464 100644
--- a/COMMIT-QUEUE.ini
+++ b/COMMIT-QUEUE.ini
@@ -11,4 +11,3 @@
                 arm-generic-no-vmtest-pre-cq
                 whirlwind-no-vmtest-pre-cq
                 veyron_pinky-no-vmtest-pre-cq
-subsystem: all
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 63fba24c..083c1ec 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2137,12 +2137,6 @@
 			parameter, xsave area per process might occupy more
 			memory on xsaves enabled systems.
 
-	eagerfpu=	[X86]
-			on	enable eager fpu restore
-			off	disable eager fpu restore
-			auto	selects the default scheme, which automatically
-				enables eagerfpu restore for xsaveopt.
-
 	nohlt		[BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
 			wfi(ARM) instruction doesn't work correctly and not to
 			use it. This is also useful when using JTAG debugger.
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 28640c3..31e1e0c 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -59,16 +59,11 @@
 asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
 				unsigned int crc_init);
 static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU;
-#if defined(X86_FEATURE_EAGER_FPU)
 #define set_pcl_breakeven_point()					\
 do {									\
 	if (!use_eager_fpu())						\
 		crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU;	\
 } while (0)
-#else
-#define set_pcl_breakeven_point()					\
-	(crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU)
-#endif
 #endif /* CONFIG_X86_64 */
 
 static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 2747acd..cdb68fbe 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -104,7 +104,7 @@
 #define X86_FEATURE_EXTD_APICID	(3*32+26) /* has extended APICID (8 bits) */
 #define X86_FEATURE_AMD_DCM     (3*32+27) /* multi-node processor */
 #define X86_FEATURE_APERFMPERF	(3*32+28) /* APERFMPERF */
-#define X86_FEATURE_EAGER_FPU	(3*32+29) /* "eagerfpu" Non lazy FPU restore */
+/* free, was #define X86_FEATURE_EAGER_FPU	( 3*32+29) * "eagerfpu" Non lazy FPU restore */
 #define X86_FEATURE_NONSTOP_TSC_S3 (3*32+30) /* TSC doesn't stop in S3 state */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
@@ -360,7 +360,6 @@
 #define cpu_has_perfctr_l2	boot_cpu_has(X86_FEATURE_PERFCTR_L2)
 #define cpu_has_cx8		boot_cpu_has(X86_FEATURE_CX8)
 #define cpu_has_cx16		boot_cpu_has(X86_FEATURE_CX16)
-#define cpu_has_eager_fpu	boot_cpu_has(X86_FEATURE_EAGER_FPU)
 #define cpu_has_topoext		boot_cpu_has(X86_FEATURE_TOPOEXT)
 
 #if __GNUC__ >= 4
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index cea1c76..3c401a3 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -87,7 +87,7 @@
 
 static __always_inline __pure bool use_eager_fpu(void)
 {
-	return static_cpu_has(X86_FEATURE_EAGER_FPU);
+	return true;
 }
 
 static __always_inline __pure bool use_xsaveopt(void)
@@ -344,8 +344,6 @@
 
 static inline void __thread_fpu_begin(struct task_struct *tsk)
 {
-	if (!static_cpu_has_safe(X86_FEATURE_EAGER_FPU))
-		clts();
 	__thread_set_has_fpu(tsk);
 }
 
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 0384aad..f63d916 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -508,19 +508,6 @@
 	xsave_state(init_xstate_buf, -1);
 }
 
-static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
-static int __init eager_fpu_setup(char *s)
-{
-	if (!strcmp(s, "on"))
-		eagerfpu = ENABLE;
-	else if (!strcmp(s, "off"))
-		eagerfpu = DISABLE;
-	else if (!strcmp(s, "auto"))
-		eagerfpu = AUTO;
-	return 1;
-}
-__setup("eagerfpu=", eager_fpu_setup);
-
 /*
  * Enable and initialize the xsave feature.
  */
@@ -559,20 +546,6 @@
 	prepare_fx_sw_frame();
 	setup_init_fpu_buf();
 
-	/* Auto enable eagerfpu for xsaveopt */
-	if (cpu_has_xsaveopt && eagerfpu != DISABLE)
-		eagerfpu = ENABLE;
-
-	if (pcntxt_mask & XSTATE_EAGER) {
-		if (eagerfpu == DISABLE) {
-			pr_err("eagerfpu not present, disabling some xstate features: 0x%llx\n",
-					pcntxt_mask & XSTATE_EAGER);
-			pcntxt_mask &= ~XSTATE_EAGER;
-		} else {
-			eagerfpu = ENABLE;
-		}
-	}
-
 	pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
 		pcntxt_mask, xstate_size);
 }
@@ -612,14 +585,6 @@
 	clear_used_math();
 	current_thread_info()->status = 0;
 
-	if (eagerfpu == ENABLE)
-		setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
-
-	if (!cpu_has_eager_fpu) {
-		stts();
-		return;
-	}
-
 	if (boot_func) {
 		boot_func();
 		boot_func = NULL;
diff --git a/chromeos/config/armel/chromiumos-ipq806x.flavour.config b/chromeos/config/armel/chromiumos-ipq806x.flavour.config
index f1f421c..838326a 100644
--- a/chromeos/config/armel/chromiumos-ipq806x.flavour.config
+++ b/chromeos/config/armel/chromiumos-ipq806x.flavour.config
@@ -162,25 +162,25 @@
 # CONFIG_BRIDGE_EBT_AMONG is not set
 # CONFIG_BRIDGE_EBT_ARP is not set
 # CONFIG_BRIDGE_EBT_ARPREPLY is not set
-CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_BROUTE=y
 # CONFIG_BRIDGE_EBT_DNAT is not set
-# CONFIG_BRIDGE_EBT_IP is not set
+CONFIG_BRIDGE_EBT_IP=y
 # CONFIG_BRIDGE_EBT_IP6 is not set
 CONFIG_BRIDGE_EBT_LIMIT=m
 CONFIG_BRIDGE_EBT_LOG=m
-CONFIG_BRIDGE_EBT_MARK=m
-CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_MARK=y
+CONFIG_BRIDGE_EBT_MARK_T=y
 # CONFIG_BRIDGE_EBT_NFLOG is not set
-# CONFIG_BRIDGE_EBT_PKTTYPE is not set
+CONFIG_BRIDGE_EBT_PKTTYPE=y
 # CONFIG_BRIDGE_EBT_REDIRECT is not set
 # CONFIG_BRIDGE_EBT_SNAT is not set
 # CONFIG_BRIDGE_EBT_STP is not set
-CONFIG_BRIDGE_EBT_T_FILTER=m
-CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_T_FILTER=y
+CONFIG_BRIDGE_EBT_T_NAT=y
 # CONFIG_BRIDGE_EBT_ULOG is not set
 # CONFIG_BRIDGE_EBT_VLAN is not set
 CONFIG_BRIDGE_NETFILTER=y
-CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_NF_EBTABLES=y
 # CONFIG_BRIDGE_VLAN_FILTERING is not set
 CONFIG_BROADCOM_PHY=y
 CONFIG_BT_ATH3K=m
diff --git a/chromeos/config/base.config b/chromeos/config/base.config
index f83d2b1..ce4855a 100644
--- a/chromeos/config/base.config
+++ b/chromeos/config/base.config
@@ -954,7 +954,7 @@
 # CONFIG_NFS_V4_1 is not set
 CONFIG_NF_CONNTRACK=y
 # CONFIG_NF_CONNTRACK_AMANDA is not set
-# CONFIG_NF_CONNTRACK_FTP is not set
+CONFIG_NF_CONNTRACK_FTP=m
 # CONFIG_NF_CONNTRACK_H323 is not set
 CONFIG_NF_CONNTRACK_IPV4=y
 CONFIG_NF_CONNTRACK_IPV6=y
@@ -966,7 +966,7 @@
 CONFIG_NF_CONNTRACK_SECMARK=y
 # CONFIG_NF_CONNTRACK_SIP is not set
 # CONFIG_NF_CONNTRACK_SNMP is not set
-# CONFIG_NF_CONNTRACK_TFTP is not set
+CONFIG_NF_CONNTRACK_TFTP=m
 # CONFIG_NF_CONNTRACK_TIMESTAMP is not set
 CONFIG_NF_CT_NETLINK=y
 # CONFIG_NF_CT_PROTO_DCCP is not set
@@ -977,7 +977,7 @@
 CONFIG_NF_DEFRAG_IPV6=y
 CONFIG_NF_NAT=y
 # CONFIG_NF_NAT_AMANDA is not set
-# CONFIG_NF_NAT_FTP is not set
+CONFIG_NF_NAT_FTP=m
 # CONFIG_NF_NAT_H323 is not set
 CONFIG_NF_NAT_IPV4=y
 CONFIG_NF_NAT_IPV6=m
@@ -986,7 +986,7 @@
 CONFIG_NF_NAT_PPTP=y
 CONFIG_NF_NAT_PROTO_GRE=y
 # CONFIG_NF_NAT_SIP is not set
-# CONFIG_NF_NAT_TFTP is not set
+CONFIG_NF_NAT_TFTP=m
 # CONFIG_NF_TABLES is not set
 # CONFIG_NILFS2_FS is not set
 CONFIG_NL80211_TESTMODE=y
@@ -1165,11 +1165,11 @@
 # CONFIG_RPCSEC_GSS_KRB5 is not set
 CONFIG_RT2800USB=m
 CONFIG_RT2800USB_RT33XX=y
-# CONFIG_RT2800USB_RT3573 is not set
+CONFIG_RT2800USB_RT3573=y
 CONFIG_RT2800USB_RT35XX=y
-# CONFIG_RT2800USB_RT53XX is not set
-# CONFIG_RT2800USB_RT55XX is not set
-# CONFIG_RT2800USB_UNKNOWN is not set
+CONFIG_RT2800USB_RT53XX=y
+CONFIG_RT2800USB_RT55XX=y
+CONFIG_RT2800USB_UNKNOWN=y
 CONFIG_RT2800_LIB=m
 CONFIG_RT2X00=m
 # CONFIG_RT2X00_DEBUG is not set
diff --git a/chromeos/config/x86_64/chromeos-intel-pineview.flavour.config b/chromeos/config/x86_64/chromeos-intel-pineview.flavour.config
index a142c29..84cfb45 100644
--- a/chromeos/config/x86_64/chromeos-intel-pineview.flavour.config
+++ b/chromeos/config/x86_64/chromeos-intel-pineview.flavour.config
@@ -11,10 +11,6 @@
 # CONFIG_FB_CFB_IMAGEBLIT is not set
 # CONFIG_IWL3945 is not set
 # CONFIG_IWL4965 is not set
-CONFIG_MEMCG=y
-# CONFIG_MEMCG_KMEM is not set
-# CONFIG_MEMCG_SWAP is not set
-CONFIG_MM_OWNER=y
 # CONFIG_MOUSE_BCM5974 is not set
 # CONFIG_MOUSE_SYNAPTICS_I2C is not set
 # CONFIG_MOUSE_SYNAPTICS_USB is not set
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index fd069cc..327fbe0 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -25,6 +25,7 @@
 #include <linux/usb.h>
 #include <linux/firmware.h>
 #include <asm/unaligned.h>
+#include <linux/delay.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -1664,6 +1665,16 @@
 
 	BT_DBG("%s", hdev->name);
 
+	/* Observed race condition during controller recovery mechanism
+	 * resulting the controller not responding to the reset command.
+	 *
+	 * To avoid such race condition need a delay of 30ms soon after the
+	 * USB re-enumeration and before sending the Reset command which shall
+	 * allow controller to completely recover and process the Reset command.
+	 */
+	BT_DBG("Delay 30ms to avoid race condition");
+	mdelay(30);
+
 	/* The controller has a bug with the first HCI command sent to it
 	 * returning number of completed commands as zero. This would stall the
 	 * command processing in the Bluetooth core.
@@ -1755,6 +1766,11 @@
 						 &disable_patch);
 		if (ret < 0)
 			goto exit_mfg_deactivate;
+		/* For each memory write controller need at least 2 ms to
+		 * realize the write is complete before it receives one
+		 * more write.
+		 */
+		mdelay(2);
 	}
 
 	release_firmware(fw);
@@ -1764,7 +1780,10 @@
 
 	/* Patching completed successfully and disable the manufacturer mode
 	 * with reset and activate the downloaded firmware patches.
+	 * 8ms delay - Once firmware patch download complete, controller needs
+	 * 8ms to validate the patches.
 	 */
+	mdelay(8);
 	err = btintel_exit_mfg(hdev, true, true);
 	if (err)
 		return err;
@@ -2322,6 +2341,22 @@
 	struct sk_buff *skb;
 	long ret;
 
+	/* In the shutdown sequence where Bluetooth is turned off followed
+	 * by WiFi being turned off, turning WiFi back on causes issue with
+	 * the RF calibration.
+	 *
+	 * To ensure that any RF activity has been stopped, issue HCI Reset
+	 * command to clear all ongoing activity including advertising,
+	 * scanning etc.
+	 */
+	skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
+	if (IS_ERR(skb)) {
+		ret = PTR_ERR(skb);
+		bt_dev_err(hdev, "HCI reset during shutdown failed");
+		return ret;
+	}
+	kfree_skb(skb);
+
 	/* Some platforms have an issue with BT LED when the interface is
 	 * down or BT radio is turned off, which takes 5 seconds to BT LED
 	 * goes off. This command turns off the BT LED immediately.
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 8a3aff7..fddaa87 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2338,7 +2338,7 @@
 	if (!CDROM_CAN(CDC_SELECT_DISC) || arg == CDSL_CURRENT)
 		return media_changed(cdi, 1);
 
-	if ((unsigned int)arg >= cdi->capacity)
+	if (arg >= cdi->capacity)
 		return -EINVAL;
 
 	info = kmalloc(sizeof(*info), GFP_KERNEL);
@@ -2509,7 +2509,7 @@
 	if (!CDROM_CAN(CDC_SELECT_DISC) ||
 	    (arg == CDSL_CURRENT || arg == CDSL_NONE))
 		return cdi->ops->drive_status(cdi, CDSL_CURRENT);
-	if (((int)arg >= cdi->capacity))
+	if (arg >= cdi->capacity)
 		return -EINVAL;
 	return cdrom_slot_status(cdi, arg);
 }
diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild
index 8e3d44c..454756f 100644
--- a/drivers/gpu/arm/midgard/Kbuild
+++ b/drivers/gpu/arm/midgard/Kbuild
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2012-2016, 2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -21,47 +21,31 @@
 
 
 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= "r22p0-01rel0"
+MALI_RELEASE_NAME ?= "r26p0-01rel0"
 
 # Paths required for build
 KBASE_PATH = $(src)
 KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
 UMP_PATH = $(src)/../../../base
 
-ifeq ($(CONFIG_MALI_ERROR_INJECT),y)
-MALI_ERROR_INJECT_ON = 1
-endif
-
 # Set up defaults if not defined by build system
 MALI_CUSTOMER_RELEASE ?= 1
+MALI_USE_CSF ?= 0
 MALI_UNIT_TEST ?= 0
 MALI_KERNEL_TEST_API ?= 0
-MALI_ERROR_INJECT_ON ?= 0
 MALI_MOCK_TEST ?= 0
 MALI_COVERAGE ?= 0
 CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
-# This workaround is for what seems to be a compiler bug we observed in
-# GCC 4.7 on AOSP 4.3.  The bug caused an intermittent failure compiling
-# the "_Pragma" syntax, where an error message is returned:
-#
-# "internal compiler error: unspellable token PRAGMA"
-#
-# This regression has thus far only been seen on the GCC 4.7 compiler bundled
-# with AOSP 4.3.0.  So this makefile, intended for in-tree kernel builds
-# which are not known to be used with AOSP, is hardcoded to disable the
-# workaround, i.e. set the define to 0.
-MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
 
 # Set up our defines, which will be passed to gcc
 DEFINES = \
 	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_USE_CSF=$(MALI_USE_CSF) \
 	-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
 	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
-	-DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \
 	-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
 	-DMALI_COVERAGE=$(MALI_COVERAGE) \
-	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
-	-DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
+	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\"
 
 ifeq ($(KBUILD_EXTMOD),)
 # in-tree
@@ -73,6 +57,8 @@
 
 DEFINES += -I$(srctree)/drivers/staging/android
 
+DEFINES += -DMALI_KBASE_BUILD
+
 # Use our defines when compiling
 ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
 subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
@@ -99,7 +85,6 @@
 	mali_kbase_hw.c \
 	mali_kbase_utility.c \
 	mali_kbase_debug.c \
-	mali_kbase_trace_timeline.c \
 	mali_kbase_gpu_memory_debugfs.c \
 	mali_kbase_mem_linux.c \
 	mali_kbase_core_linux.c \
@@ -154,6 +139,10 @@
   endif
 endif
 
+ifeq ($(MALI_USE_CSF),1)
+	include $(src)/csf/Kbuild
+endif
+
 mali_kbase-$(CONFIG_MALI_DMA_FENCE) += \
 	mali_kbase_dma_fence.o \
 	mali_kbase_fence.o
diff --git a/drivers/gpu/arm/midgard/Kconfig b/drivers/gpu/arm/midgard/Kconfig
index 84ad143..af2a5aa 100644
--- a/drivers/gpu/arm/midgard/Kconfig
+++ b/drivers/gpu/arm/midgard/Kconfig
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -58,6 +58,7 @@
 config MALI_DEVFREQ
 	bool "devfreq support for Mali"
 	depends on MALI_MIDGARD && PM_DEVFREQ
+	default y
 	help
 	  Support devfreq for Mali.
 
@@ -107,19 +108,6 @@
 
 	  If unsure, say N.
 
-config MALI_PRFCNT_SET_SECONDARY
-	bool "Use secondary set of performance counters"
-	depends on MALI_MIDGARD && MALI_EXPERT
-	default n
-	help
-	  Select this option to use secondary set of performance counters. Kernel
-	  features that depend on an access to the primary set of counters may
-	  become unavailable. Enabling this option will prevent power management
-	  from working optimally and may cause instrumentation tools to return
-	  bogus results.
-
-	  If unsure, say N.
-
 config MALI_DEBUG
 	bool "Debug build"
 	depends on MALI_MIDGARD && MALI_EXPERT
@@ -163,30 +151,13 @@
 	help
 	  Enables insertion of errors to test module failure and recovery mechanisms.
 
-config MALI_TRACE_TIMELINE
-	bool "Timeline tracing"
-	depends on MALI_MIDGARD && MALI_EXPERT
-	default n
-	help
-	  Enables timeline tracing through the kernel tracepoint system.
-
 config MALI_SYSTEM_TRACE
 	bool "Enable system event tracing support"
 	depends on MALI_MIDGARD && MALI_EXPERT
 	default n
 	help
 	  Choose this option to enable system trace events for each
-	  kbase event.	This is typically used for debugging but has
-	  minimal overhead when not in use. Enable only if you know what
-	  you are doing.
-
-config MALI_JOB_DUMPING
-	bool "Enable system level support needed for job dumping"
-	depends on MALI_MIDGARD && MALI_EXPERT
-	default n
-	help
-	  Choose this option to enable system level support needed for
-	  job dumping.	This is typically used for instrumentation but has
+	  kbase event. This is typically used for debugging but has
 	  minimal overhead when not in use. Enable only if you know what
 	  you are doing.
 
@@ -213,5 +184,30 @@
 	  If using kernel >= v4.10 then say N, otherwise if devfreq cooling
 	  changes have been backported say Y to avoid compilation errors.
 
+# Instrumentation options.
+
+config MALI_JOB_DUMP
+	bool "Enable system level support needed for job dumping"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system level support needed for
+	  job dumping. This is typically used for instrumentation but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_PRFCNT_SET_SECONDARY
+	bool "Use secondary set of performance counters"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option to use secondary set of performance counters. Kernel
+	  features that depend on an access to the primary set of counters may
+	  become unavailable. Enabling this option will prevent power management
+	  from working optimally and may cause instrumentation tools to return
+	  bogus results.
+
+	  If unsure, say N.
+
 source "drivers/gpu/arm/midgard/platform/Kconfig"
 source "drivers/gpu/arm/midgard/tests/Kconfig"
diff --git a/drivers/gpu/arm/midgard/Makefile b/drivers/gpu/arm/midgard/Makefile
index cfe6fc3..13af9f4 100644
--- a/drivers/gpu/arm/midgard/Makefile
+++ b/drivers/gpu/arm/midgard/Makefile
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2010-2016, 2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -23,9 +23,7 @@
 KDIR ?= /lib/modules/$(shell uname -r)/build
 
 BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
-UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump
 KBASE_PATH_RELATIVE = $(CURDIR)
-EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers
 
 ifeq ($(MALI_UNIT_TEST), 1)
 	EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
diff --git a/drivers/gpu/arm/midgard/Mconfig b/drivers/gpu/arm/midgard/Mconfig
index 9cfa368..583dec3 100644
--- a/drivers/gpu/arm/midgard/Mconfig
+++ b/drivers/gpu/arm/midgard/Mconfig
@@ -24,7 +24,7 @@
 
 config MALI_GATOR_SUPPORT
 	bool "Streamline support via Gator"
-	depends on MALI_MIDGARD
+	depends on MALI_MIDGARD && !BACKEND_USER
 	default y if INSTRUMENTATION_STREAMLINE_OLD
 	default n
 	help
@@ -84,6 +84,9 @@
 	  include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
 	  exist.
 
+	  When PLATFORM_CUSTOM is set, this needs to be set manually to
+	  pick up the desired platform files.
+
 config MALI_MOCK_TEST
 	bool
 	depends on MALI_MIDGARD && !RELEASE
@@ -112,19 +115,6 @@
 
 	  If unsure, say N.
 
-config MALI_PRFCNT_SET_SECONDARY
-	bool "Use secondary set of performance counters"
-	depends on MALI_MIDGARD && MALI_EXPERT
-	default n
-	help
-	  Select this option to use secondary set of performance counters. Kernel
-	  features that depend on an access to the primary set of counters may
-	  become unavailable. Enabling this option will prevent power management
-	  from working optimally and may cause instrumentation tools to return
-	  bogus results.
-
-	  If unsure, say N.
-
 config MALI_DEBUG
 	bool "Debug build"
 	depends on MALI_MIDGARD && MALI_EXPERT
@@ -164,13 +154,6 @@
 	help
 	  Injected errors are random, rather than user-driven.
 
-config MALI_TRACE_TIMELINE
-	bool "Timeline tracing"
-	depends on MALI_MIDGARD && MALI_EXPERT
-	default n
-	help
-	  Enables timeline tracing through the kernel tracepoint system.
-
 config MALI_SYSTEM_TRACE
 	bool "Enable system event tracing support"
 	depends on MALI_MIDGARD && MALI_EXPERT
@@ -205,3 +188,10 @@
 	  PWRSOFT-765 fixes devfreq cooling devices issues. However, they are
 	  not merged in mainline kernel yet. So this define helps to guard those
 	  parts of the code.
+
+# Instrumentation options.
+
+# config MALI_JOB_DUMP exists in the Kernel Kconfig but is configured using CINSTR_JOB_DUMP in Mconfig.
+# config MALI_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig.
+
+source "kernel/drivers/gpu/arm/midgard/tests/Mconfig"
diff --git a/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/drivers/gpu/arm/midgard/backend/gpu/Kbuild
index bdf4c5a..dcd8ca4 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/Kbuild
+++ b/drivers/gpu/arm/midgard/backend/gpu/Kbuild
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2014,2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -30,14 +30,12 @@
 	backend/gpu/mali_kbase_jm_as.c \
 	backend/gpu/mali_kbase_jm_hw.c \
 	backend/gpu/mali_kbase_jm_rb.c \
-	backend/gpu/mali_kbase_js_affinity.c \
 	backend/gpu/mali_kbase_js_backend.c \
 	backend/gpu/mali_kbase_mmu_hw_direct.c \
 	backend/gpu/mali_kbase_pm_backend.c \
 	backend/gpu/mali_kbase_pm_driver.c \
 	backend/gpu/mali_kbase_pm_metrics.c \
 	backend/gpu/mali_kbase_pm_ca.c \
-	backend/gpu/mali_kbase_pm_ca_fixed.c \
 	backend/gpu/mali_kbase_pm_always_on.c \
 	backend/gpu/mali_kbase_pm_coarse_demand.c \
 	backend/gpu/mali_kbase_pm_demand.c \
@@ -46,15 +44,13 @@
 
 ifeq ($(MALI_CUSTOMER_RELEASE),0)
 BACKEND += \
-	backend/gpu/mali_kbase_pm_ca_random.c \
 	backend/gpu/mali_kbase_pm_demand_always_powered.c \
 	backend/gpu/mali_kbase_pm_fast_start.c
 endif
 
 ifeq ($(CONFIG_MALI_DEVFREQ),y)
 BACKEND += \
-	backend/gpu/mali_kbase_devfreq.c \
-	backend/gpu/mali_kbase_pm_ca_devfreq.c
+	backend/gpu/mali_kbase_devfreq.c
 endif
 
 ifeq ($(CONFIG_MALI_NO_MALI),y)
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
index 49567f7..7378bfd 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,6 +29,6 @@
 	kbdev->current_gpu_coherency_mode = mode;
 
 	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
-		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
+		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode);
 }
 
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
index c9c463e..450f6e7 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -152,7 +152,7 @@
 	while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
 		kctx->reg_dump[offset+1] =
 				kbase_reg_read(kctx->kbdev,
-						kctx->reg_dump[offset], NULL);
+						kctx->reg_dump[offset]);
 		offset += 2;
 	}
 	return true;
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
index 9c9a0b3..683a24c 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -92,14 +92,21 @@
 
 	freq = *target_freq;
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 	rcu_read_lock();
+#endif
 	opp = devfreq_recommended_opp(dev, &freq, flags);
 	voltage = dev_pm_opp_get_voltage(opp);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 	rcu_read_unlock();
+#endif
 	if (IS_ERR_OR_NULL(opp)) {
 		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
 		return PTR_ERR(opp);
 	}
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+	dev_pm_opp_put(opp);
+#endif
 
 	nominal_freq = freq;
 
@@ -141,9 +148,7 @@
 	}
 #endif
 
-	if (kbdev->pm.backend.ca_current_policy->id ==
-			KBASE_PM_CA_POLICY_ID_DEVFREQ)
-		kbase_devfreq_set_core_mask(kbdev, core_mask);
+	kbase_devfreq_set_core_mask(kbdev, core_mask);
 
 	*target_freq = nominal_freq;
 	kbdev->current_voltage = voltage;
@@ -153,8 +158,6 @@
 
 	KBASE_TLSTREAM_AUX_DEVFREQ_TARGET((u64)nominal_freq);
 
-	kbase_pm_reset_dvfs_utilisation(kbdev);
-
 	return err;
 }
 
@@ -172,12 +175,13 @@
 kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
 {
 	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct kbasep_pm_metrics diff;
 
+	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->last_devfreq_metrics, &diff);
+
+	stat->busy_time = diff.time_busy;
+	stat->total_time = diff.time_busy + diff.time_idle;
 	stat->current_frequency = kbdev->current_nominal_freq;
-
-	kbase_pm_get_dvfs_utilisation(kbdev,
-			&stat->total_time, &stat->busy_time);
-
 	stat->private_data = NULL;
 
 	return 0;
@@ -191,20 +195,24 @@
 	unsigned long freq;
 	struct dev_pm_opp *opp;
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 	rcu_read_lock();
+#endif
 	count = dev_pm_opp_get_opp_count(kbdev->dev);
-	if (count < 0) {
-		rcu_read_unlock();
-		return count;
-	}
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 	rcu_read_unlock();
+#endif
+	if (count < 0)
+		return count;
 
 	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
 				GFP_KERNEL);
 	if (!dp->freq_table)
 		return -ENOMEM;
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 	rcu_read_lock();
+#endif
 	for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) {
 		opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq);
 		if (IS_ERR(opp))
@@ -215,7 +223,9 @@
 
 		dp->freq_table[i] = freq;
 	}
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 	rcu_read_unlock();
+#endif
 
 	if (count != i)
 		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
@@ -247,6 +257,7 @@
 	struct device_node *node;
 	int i = 0;
 	int count;
+	u64 shader_present = kbdev->gpu_props.props.raw_props.shader_present;
 
 	if (!opp_node)
 		return 0;
@@ -271,8 +282,14 @@
 		if (of_property_read_u64(node, "opp-hz-real", &real_freq))
 			real_freq = opp_freq;
 		if (of_property_read_u64(node, "opp-core-mask", &core_mask))
-			core_mask =
-				kbdev->gpu_props.props.raw_props.shader_present;
+			core_mask = shader_present;
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11056) &&
+				core_mask != shader_present) {
+			dev_warn(kbdev->dev, "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n",
+					opp_freq);
+			continue;
+		}
+
 		core_count_p = of_get_property(node, "opp-core-count", NULL);
 		if (core_count_p) {
 			u64 remaining_core_mask =
@@ -352,7 +369,7 @@
 	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
 				"simple_ondemand", NULL);
 	if (IS_ERR(kbdev->devfreq)) {
-		kbase_devfreq_term_freq_table(kbdev);
+		kfree(dp->freq_table);
 		return PTR_ERR(kbdev->devfreq);
 	}
 
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
index a0dfd81..ebc30222 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -154,11 +154,9 @@
 #endif /* CONFIG_DEBUG_FS */
 
 
-void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
-						struct kbase_context *kctx)
+void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
 {
 	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
-	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
 	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
 
 	writel(value, kbdev->reg + offset);
@@ -168,21 +166,15 @@
 		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
 				value, 1);
 #endif /* CONFIG_DEBUG_FS */
-	dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
-
-	if (kctx && kctx->jctx.tb)
-		kbase_device_trace_register_access(kctx, REG_WRITE, offset,
-									value);
+	dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value);
 }
 
 KBASE_EXPORT_TEST_API(kbase_reg_write);
 
-u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
-						struct kbase_context *kctx)
+u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
 {
 	u32 val;
 	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
-	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
 	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
 
 	val = readl(kbdev->reg + offset);
@@ -192,10 +184,8 @@
 		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
 				val, 0);
 #endif /* CONFIG_DEBUG_FS */
-	dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
+	dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val);
 
-	if (kctx && kctx->jctx.tb)
-		kbase_device_trace_register_access(kctx, REG_READ, offset, val);
 	return val;
 }
 
@@ -216,11 +206,11 @@
 	u32 status;
 	u64 address;
 
-	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
+	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS));
 	address = (u64) kbase_reg_read(kbdev,
-			GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32;
 	address |= kbase_reg_read(kbdev,
-			GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_LO));
 
 	dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
 			status & 0xFF,
@@ -246,7 +236,7 @@
 		kbase_clean_caches_done(kbdev);
 
 	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
 
 	/* kbase_pm_check_transitions must be called after the IRQ has been
 	 * cleared. This is because it might trigger further power transitions
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
index 729256e..928efe9 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -34,29 +34,21 @@
  * @kbdev:  Kbase device pointer
  * @offset: Offset of register
  * @value:  Value to write
- * @kctx:   Kbase context pointer. May be NULL
  *
- * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
- * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
- * != KBASEP_AS_NR_INVALID).
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false).
  */
-void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
-						struct kbase_context *kctx);
+void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value);
 
 /**
  * kbase_reg_read - read from GPU register
  * @kbdev:  Kbase device pointer
  * @offset: Offset of register
- * @kctx:   Kbase context pointer. May be NULL
  *
- * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
- * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
- * != KBASEP_AS_NR_INVALID).
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false).
  *
  * Return: Value in desired register
  */
-u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
-						struct kbase_context *kctx);
+u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset);
 
 
 /**
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
index 02dc1ea..39773e6 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -37,60 +37,61 @@
 	int i;
 
 	/* Fill regdump with the content of the relevant registers */
-	regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
+	regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID));
 
 	regdump->l2_features = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(L2_FEATURES), NULL);
-	regdump->suspend_size = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(SUSPEND_SIZE), NULL);
+				GPU_CONTROL_REG(L2_FEATURES));
+	regdump->core_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(CORE_FEATURES));
 	regdump->tiler_features = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(TILER_FEATURES), NULL);
+				GPU_CONTROL_REG(TILER_FEATURES));
 	regdump->mem_features = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(MEM_FEATURES), NULL);
+				GPU_CONTROL_REG(MEM_FEATURES));
 	regdump->mmu_features = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(MMU_FEATURES), NULL);
+				GPU_CONTROL_REG(MMU_FEATURES));
 	regdump->as_present = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(AS_PRESENT), NULL);
+				GPU_CONTROL_REG(AS_PRESENT));
 	regdump->js_present = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(JS_PRESENT), NULL);
+				GPU_CONTROL_REG(JS_PRESENT));
 
 	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
 		regdump->js_features[i] = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
+				GPU_CONTROL_REG(JS_FEATURES_REG(i)));
 
 	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
 		regdump->texture_features[i] = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
+				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)));
 
 	regdump->thread_max_threads = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
+				GPU_CONTROL_REG(THREAD_MAX_THREADS));
 	regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
-									NULL);
+				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE));
 	regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
+				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE));
 	regdump->thread_features = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(THREAD_FEATURES), NULL);
+				GPU_CONTROL_REG(THREAD_FEATURES));
+	regdump->thread_tls_alloc = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_TLS_ALLOC));
 
 	regdump->shader_present_lo = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
+				GPU_CONTROL_REG(SHADER_PRESENT_LO));
 	regdump->shader_present_hi = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
+				GPU_CONTROL_REG(SHADER_PRESENT_HI));
 
 	regdump->tiler_present_lo = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
+				GPU_CONTROL_REG(TILER_PRESENT_LO));
 	regdump->tiler_present_hi = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
+				GPU_CONTROL_REG(TILER_PRESENT_HI));
 
 	regdump->l2_present_lo = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
+				GPU_CONTROL_REG(L2_PRESENT_LO));
 	regdump->l2_present_hi = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
+				GPU_CONTROL_REG(L2_PRESENT_HI));
 
 	regdump->stack_present_lo = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(STACK_PRESENT_LO), NULL);
+				GPU_CONTROL_REG(STACK_PRESENT_LO));
 	regdump->stack_present_hi = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(STACK_PRESENT_HI), NULL);
+				GPU_CONTROL_REG(STACK_PRESENT_HI));
 }
 
 void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
@@ -101,7 +102,7 @@
 		kbase_pm_register_access_enable(kbdev);
 
 		regdump->coherency_features = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+				GPU_CONTROL_REG(COHERENCY_FEATURES));
 
 		/* We're done accessing the GPU registers for now. */
 		kbase_pm_register_access_disable(kbdev);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
index 3cbfb44..6c69132 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -51,16 +51,16 @@
 
 	/* Enable interrupt */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
-	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
-				irq_mask | CLEAN_CACHES_COMPLETED, NULL);
+				irq_mask | CLEAN_CACHES_COMPLETED);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
 
 	/* clean&invalidate the caches so we're sure the mmu tables for the dump
 	 * buffer is valid */
 	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+					GPU_COMMAND_CLEAN_INV_CACHES);
 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
 
 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
@@ -68,20 +68,16 @@
 
 int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 					struct kbase_context *kctx,
-					struct kbase_uk_hwcnt_setup *setup)
+					struct kbase_ioctl_hwcnt_enable *enable)
 {
 	unsigned long flags, pm_flags;
 	int err = -EINVAL;
 	u32 irq_mask;
 	int ret;
-	u64 shader_cores_needed;
 	u32 prfcnt_config;
 
-	shader_cores_needed = kbase_pm_get_present_cores(kbdev,
-							KBASE_PM_CORE_SHADER);
-
 	/* alignment failure */
-	if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
+	if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1)))
 		goto out_err;
 
 	/* Override core availability policy to ensure all cores are available
@@ -90,7 +86,7 @@
 
 	/* Request the cores early on synchronously - we'll release them on any
 	 * errors (e.g. instrumentation already active) */
-	kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
+	kbase_pm_request_cores_sync(kbdev, true, true);
 
 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 
@@ -102,15 +98,15 @@
 
 	/* Enable interrupt */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
-	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
-						PRFCNT_SAMPLE_COMPLETED, NULL);
+						PRFCNT_SAMPLE_COMPLETED);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
 
 	/* In use, this context is the owner */
 	kbdev->hwcnt.kctx = kctx;
 	/* Remember the dump address so we can reprogram it later */
-	kbdev->hwcnt.addr = setup->dump_buffer;
+	kbdev->hwcnt.addr = enable->dump_buffer;
 
 	/* Request the clean */
 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
@@ -147,35 +143,34 @@
 #endif
 
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
-			prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+			prfcnt_config | PRFCNT_CONFIG_MODE_OFF);
 
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
-					setup->dump_buffer & 0xFFFFFFFF, kctx);
+					enable->dump_buffer & 0xFFFFFFFF);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
-					setup->dump_buffer >> 32,        kctx);
+					enable->dump_buffer >> 32);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
-					setup->jm_bm,                    kctx);
+					enable->jm_bm);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
-					setup->shader_bm,                kctx);
+					enable->shader_bm);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
-					setup->mmu_l2_bm,                kctx);
+					enable->mmu_l2_bm);
 	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
 	 * HW counter dump. */
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
-		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
-									kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0);
 	else
 		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
-							setup->tiler_bm, kctx);
+							enable->tiler_bm);
 
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
-			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
+			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL);
 
 	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
 	 */
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
 		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
-							setup->tiler_bm, kctx);
+							enable->tiler_bm);
 
 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 
@@ -191,7 +186,7 @@
 	return err;
  out_unrequest_cores:
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
+	kbase_pm_release_cores(kbdev, true, true);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
  out_err:
 	return err;
@@ -234,20 +229,19 @@
 
 	/* Disable interrupt */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
-	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
-				irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
+				irq_mask & ~PRFCNT_SAMPLE_COMPLETED);
 
 	/* Disable the counters */
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0);
 
 	kbdev->hwcnt.kctx = NULL;
 	kbdev->hwcnt.addr = 0ULL;
 
 	kbase_pm_ca_instr_disable(kbdev);
 
-	kbase_pm_unrequest_cores(kbdev, true,
-		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+	kbase_pm_release_cores(kbdev, true, true);
 
 	kbase_pm_release_l2_caches(kbdev);
 
@@ -290,15 +284,15 @@
 
 	/* Reconfigure the dump address */
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
-					kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
+					kbdev->hwcnt.addr & 0xFFFFFFFF);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
-					kbdev->hwcnt.addr >> 32, NULL);
+					kbdev->hwcnt.addr >> 32);
 
 	/* Start dumping */
 	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
 					kbdev->hwcnt.addr, 0);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-					GPU_COMMAND_PRFCNT_SAMPLE, kctx);
+					GPU_COMMAND_PRFCNT_SAMPLE);
 
 	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
 
@@ -376,13 +370,20 @@
 		kbdev->hwcnt.backend.triggered = 1;
 		wake_up(&kbdev->hwcnt.backend.wait);
 	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
-		int ret;
-		/* Always clean and invalidate the cache after a successful dump
-		 */
-		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
-		ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
-					&kbdev->hwcnt.backend.cache_clean_work);
-		KBASE_DEBUG_ASSERT(ret);
+		if (kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) {
+			/* All finished and idle */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+			kbdev->hwcnt.backend.triggered = 1;
+			wake_up(&kbdev->hwcnt.backend.wait);
+		} else {
+			int ret;
+			/* Always clean and invalidate the cache after a successful dump
+			 */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+			ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+						&kbdev->hwcnt.backend.cache_clean_work);
+			KBASE_DEBUG_ASSERT(ret);
+		}
 	}
 
 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
@@ -399,10 +400,9 @@
 		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 		/* Disable interrupt */
 		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
-		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
-									NULL);
+		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
 		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
-				irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
+				irq_mask & ~CLEAN_CACHES_COMPLETED);
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
 
 		/* Wakeup... */
@@ -460,7 +460,7 @@
 	/* Clear the counters */
 	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-						GPU_COMMAND_PRFCNT_CLEAR, kctx);
+						GPU_COMMAND_PRFCNT_CLEAR);
 
 	err = 0;
 
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
index 95bebf8..dd0279a 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -58,7 +58,7 @@
 		return IRQ_NONE;
 	}
 
-	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
 
 #ifdef CONFIG_MALI_DEBUG
 	if (!kbdev->pm.backend.driver_ready_for_irqs)
@@ -96,7 +96,7 @@
 
 	atomic_inc(&kbdev->faults_pending);
 
-	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS));
 
 #ifdef CONFIG_MALI_DEBUG
 	if (!kbdev->pm.backend.driver_ready_for_irqs)
@@ -134,7 +134,7 @@
 		return IRQ_NONE;
 	}
 
-	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS));
 
 #ifdef CONFIG_MALI_DEBUG
 	if (!kbdev->pm.backend.driver_ready_for_irqs)
@@ -239,7 +239,7 @@
 		return IRQ_NONE;
 	}
 
-	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
 
 	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
 
@@ -251,7 +251,7 @@
 	kbasep_irq_test_data.triggered = 1;
 	wake_up(&kbasep_irq_test_data.wait);
 
-	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
 
 	return IRQ_HANDLED;
 }
@@ -271,7 +271,7 @@
 		return IRQ_NONE;
 	}
 
-	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS));
 
 	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
 
@@ -283,7 +283,7 @@
 	kbasep_irq_test_data.triggered = 1;
 	wake_up(&kbasep_irq_test_data.wait);
 
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val);
 
 	return IRQ_HANDLED;
 }
@@ -327,9 +327,9 @@
 	}
 
 	/* store old mask */
-	old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
+	old_mask_val = kbase_reg_read(kbdev, mask_offset);
 	/* mask interrupts */
-	kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+	kbase_reg_write(kbdev, mask_offset, 0x0);
 
 	if (kbdev->irqs[tag].irq) {
 		/* release original handler and install test handler */
@@ -343,8 +343,8 @@
 						kbasep_test_interrupt_timeout;
 
 			/* trigger interrupt */
-			kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
-			kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
+			kbase_reg_write(kbdev, mask_offset, 0x1);
+			kbase_reg_write(kbdev, rawstat_offset, 0x1);
 
 			hrtimer_start(&kbasep_irq_test_data.timer,
 					HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
@@ -366,7 +366,7 @@
 			kbasep_irq_test_data.triggered = 0;
 
 			/* mask interrupts */
-			kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+			kbase_reg_write(kbdev, mask_offset, 0x0);
 
 			/* release test handler */
 			free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
@@ -382,7 +382,7 @@
 		}
 	}
 	/* restore old mask */
-	kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
+	kbase_reg_write(kbdev, mask_offset, old_mask_val);
 
 	return err;
 }
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
index 4c99152..c8153ba 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -68,11 +68,12 @@
 }
 
 bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
-						struct kbase_context *kctx)
+						struct kbase_context *kctx,
+						int js)
 {
 	int i;
 
-	if (kbdev->hwaccess.active_kctx == kctx) {
+	if (kbdev->hwaccess.active_kctx[js] == kctx) {
 		/* Context is already active */
 		return true;
 	}
@@ -213,12 +214,15 @@
 {
 	struct kbasep_js_device_data *js_devdata;
 	struct kbase_as *new_address_space = NULL;
+	int js;
 
 	js_devdata = &kbdev->js_data;
 
-	if (kbdev->hwaccess.active_kctx == kctx) {
-		WARN(1, "Context is already scheduled in\n");
-		return false;
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		if (kbdev->hwaccess.active_kctx[js] == kctx) {
+			WARN(1, "Context is already scheduled in\n");
+			return false;
+		}
 	}
 
 	new_address_space = &kbdev->as[as_nr];
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
index 27a6ca0..b4d2ae1c 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -113,16 +113,4 @@
 	bool timeouts_updated;
 };
 
-/**
- * struct kbase_jd_atom_backend - GPU backend specific katom data
- */
-struct kbase_jd_atom_backend {
-};
-
-/**
- * struct kbase_context_backend - GPU backend specific context data
- */
-struct kbase_context_backend {
-};
-
 #endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
index 331f6ee..fee19aa 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -37,7 +37,6 @@
 #include <mali_kbase_ctx_sched.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 #include <backend/gpu/mali_kbase_irq_internal.h>
-#include <backend/gpu/mali_kbase_js_affinity.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
 
 #define beenthere(kctx, f, a...) \
@@ -52,7 +51,54 @@
 static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
 						struct kbase_context *kctx)
 {
-	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx);
+	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT));
+}
+
+static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
+				base_jd_core_req core_req,
+				int js)
+{
+	u64 affinity;
+
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+			BASE_JD_REQ_T) {
+		/* Tiler-only atom */
+		/* If the hardware supports XAFFINITY then we'll only enable
+		 * the tiler (which is the default so this is a no-op),
+		 * otherwise enable shader core 0.
+		 */
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			affinity = 1;
+		else
+			affinity = 0;
+	} else if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
+			BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+		unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+		struct mali_base_gpu_coherent_group_info *coherency_info =
+			&kbdev->gpu_props.props.coherency_info;
+
+		affinity = kbase_pm_ca_get_core_mask(kbdev) &
+				kbdev->pm.debug_core_mask[js];
+
+		/* JS2 on a dual core group system targets core group 1. All
+		 * other cases target core group 0.
+		 */
+		if (js == 2 && num_core_groups > 1)
+			affinity &= coherency_info->group[1].core_mask;
+		else
+			affinity &= coherency_info->group[0].core_mask;
+	} else {
+		/* Use all cores */
+		affinity = kbase_pm_ca_get_core_mask(kbdev) &
+				kbdev->pm.debug_core_mask[js];
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+					affinity & 0xFFFFFFFF);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+					affinity >> 32);
+
+	return affinity;
 }
 
 void kbase_job_hw_submit(struct kbase_device *kbdev,
@@ -62,6 +108,7 @@
 	struct kbase_context *kctx;
 	u32 cfg;
 	u64 jc_head = katom->jc;
+	u64 affinity;
 
 	KBASE_DEBUG_ASSERT(kbdev);
 	KBASE_DEBUG_ASSERT(katom);
@@ -70,20 +117,13 @@
 
 	/* Command register must be available */
 	KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
-	/* Affinity is not violating */
-	kbase_js_debug_log_current_affinities(kbdev);
-	KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js,
-							katom->affinity));
 
 	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
-						jc_head & 0xFFFFFFFF, kctx);
+						jc_head & 0xFFFFFFFF);
 	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
-						jc_head >> 32, kctx);
+						jc_head >> 32);
 
-	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
-					katom->affinity & 0xFFFFFFFF, kctx);
-	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
-					katom->affinity >> 32, kctx);
+	affinity = kbase_job_write_affinity(kbdev, katom->core_req, js);
 
 	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
 	 * start */
@@ -127,11 +167,11 @@
 		}
 	}
 
-	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg);
 
 	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
 		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
-				katom->flush_id, kctx);
+				katom->flush_id);
 
 	/* Write an approximate start timestamp.
 	 * It's approximate because there might be a job in the HEAD register.
@@ -139,11 +179,11 @@
 	katom->start_timestamp = ktime_get();
 
 	/* GO ! */
-	dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx",
-				katom, kctx, js, jc_head, katom->affinity);
+	dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx",
+				katom, kctx, js, jc_head);
 
 	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
-							(u32) katom->affinity);
+							(u32)affinity);
 
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
 	kbase_trace_mali_job_slots_event(
@@ -151,7 +191,7 @@
 				kctx, kbase_jd_atom_id(kctx, katom));
 #endif
 	KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(katom, jc_head,
-			katom->affinity, cfg);
+			affinity, cfg);
 	KBASE_TLSTREAM_TL_RET_CTX_LPU(
 		kctx,
 		&kbdev->gpu_props.props.raw_props.js_features[
@@ -174,10 +214,8 @@
 		kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
 	}
 #endif
-	kbase_timeline_job_slot_submit(kbdev, kctx, katom, js);
-
 	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
-						JS_COMMAND_START, katom->kctx);
+						JS_COMMAND_START);
 }
 
 /**
@@ -245,9 +283,6 @@
 
 	KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
 
-	memset(&kbdev->slot_submit_count_irq[0], 0,
-					sizeof(kbdev->slot_submit_count_irq));
-
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 	while (done) {
@@ -272,10 +307,9 @@
 				/* read out the job slot status code if the job
 				 * slot reported failure */
 				completion_code = kbase_reg_read(kbdev,
-					JOB_SLOT_REG(i, JS_STATUS), NULL);
+					JOB_SLOT_REG(i, JS_STATUS));
 
-				switch (completion_code) {
-				case BASE_JD_EVENT_STOPPED:
+				if (completion_code == BASE_JD_EVENT_STOPPED) {
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
 					kbase_trace_mali_job_slots_event(
 						GATOR_MAKE_EVENT(
@@ -290,37 +324,27 @@
 					 * JS<n>_TAIL so that the job chain can
 					 * be resumed */
 					job_tail = (u64)kbase_reg_read(kbdev,
-						JOB_SLOT_REG(i, JS_TAIL_LO),
-									NULL) |
+						JOB_SLOT_REG(i, JS_TAIL_LO)) |
 						((u64)kbase_reg_read(kbdev,
-						JOB_SLOT_REG(i, JS_TAIL_HI),
-								NULL) << 32);
-					break;
-				case BASE_JD_EVENT_NOT_STARTED:
+						JOB_SLOT_REG(i, JS_TAIL_HI))
+						 << 32);
+				} else if (completion_code ==
+						BASE_JD_EVENT_NOT_STARTED) {
 					/* PRLAM-10673 can cause a TERMINATED
 					 * job to come back as NOT_STARTED, but
 					 * the error interrupt helps us detect
 					 * it */
 					completion_code =
 						BASE_JD_EVENT_TERMINATED;
-					/* fall through */
-				default:
-					dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
-							i, completion_code,
-							kbase_exception_name
-							(kbdev,
-							completion_code));
 				}
 
-				kbase_gpu_irq_evict(kbdev, i);
+				kbase_gpu_irq_evict(kbdev, i, completion_code);
 			}
 
 			kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
-					done & ((1 << i) | (1 << (i + 16))),
-					NULL);
+					done & ((1 << i) | (1 << (i + 16))));
 			active = kbase_reg_read(kbdev,
-					JOB_CONTROL_REG(JOB_IRQ_JS_STATE),
-					NULL);
+					JOB_CONTROL_REG(JOB_IRQ_JS_STATE));
 
 			if (((active >> i) & 1) == 0 &&
 					(((done >> (i + 16)) & 1) == 0)) {
@@ -365,7 +389,7 @@
 				 * execution.
 				 */
 				u32 rawstat = kbase_reg_read(kbdev,
-					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT));
 
 				if ((rawstat >> (i + 16)) & 1) {
 					/* There is a failed job that we've
@@ -415,7 +439,7 @@
 			}
  spurious:
 			done = kbase_reg_read(kbdev,
-					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT));
 
 			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
 				/* Workaround for missing interrupt caused by
@@ -423,7 +447,7 @@
 				if (((active >> i) & 1) && (0 ==
 						kbase_reg_read(kbdev,
 							JOB_SLOT_REG(i,
-							JS_STATUS), NULL))) {
+							JS_STATUS)))) {
 					/* Force job slot to be processed again
 					 */
 					done |= (1u << i);
@@ -487,7 +511,6 @@
 					base_jd_core_req core_reqs,
 					struct kbase_jd_atom *target_katom)
 {
-	struct kbase_context *kctx = target_katom->kctx;
 #if KBASE_TRACE_ENABLE
 	u32 status_reg_before;
 	u64 job_in_head_before;
@@ -497,12 +520,11 @@
 
 	/* Check the head pointer */
 	job_in_head_before = ((u64) kbase_reg_read(kbdev,
-					JOB_SLOT_REG(js, JS_HEAD_LO), NULL))
+					JOB_SLOT_REG(js, JS_HEAD_LO)))
 			| (((u64) kbase_reg_read(kbdev,
-					JOB_SLOT_REG(js, JS_HEAD_HI), NULL))
+					JOB_SLOT_REG(js, JS_HEAD_HI)))
 									<< 32);
-	status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
-									NULL);
+	status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS));
 #endif
 
 	if (action == JS_COMMAND_SOFT_STOP) {
@@ -606,11 +628,10 @@
 		}
 	}
 
-	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action);
 
 #if KBASE_TRACE_ENABLE
-	status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
-									NULL);
+	status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS));
 	if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
 		struct kbase_jd_atom *head;
 		struct kbase_context *head_kctx;
@@ -745,7 +766,9 @@
 		if (!katom)
 			continue;
 
-		if (katom->kctx != kctx)
+		if ((kbdev->js_ctx_scheduling_mode ==
+			KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE) &&
+				(katom->kctx != kctx))
 			continue;
 
 		if (katom->sched_priority > priority) {
@@ -813,7 +836,7 @@
 		mutex_lock(&kbdev->pm.lock);
 		if (kbdev->pm.backend.gpu_powered)
 			flush_id = kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(LATEST_FLUSH), NULL);
+					GPU_CONTROL_REG(LATEST_FLUSH));
 		mutex_unlock(&kbdev->pm.lock);
 	}
 
@@ -1072,34 +1095,32 @@
 
 	dev_err(kbdev->dev, "Register state:");
 	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL));
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)));
 	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x",
-		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL),
-		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL));
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE)));
 	for (i = 0; i < 3; i++) {
 		dev_err(kbdev->dev, "  JS%d_STATUS=0x%08x      JS%d_HEAD_LO=0x%08x",
-			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS),
-					NULL),
-			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO),
-					NULL));
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS)),
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO)));
 	}
 	dev_err(kbdev->dev, "  MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
-		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL),
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL));
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)));
 	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x    JOB_IRQ_MASK=0x%08x     MMU_IRQ_MASK=0x%08x",
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL),
-		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL),
-		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL));
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)),
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)));
 	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL),
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL));
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1)));
 	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x",
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL),
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL));
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)));
 	dev_err(kbdev->dev, "  TILER_CONFIG=0x%08x    JM_CONFIG=0x%08x",
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG), NULL),
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG), NULL));
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG)));
 }
 
 static void kbasep_reset_timeout_worker(struct work_struct *data)
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
index d71a9ed..831491e 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -166,4 +166,24 @@
  */
 void kbase_gpu_cacheclean(struct kbase_device *kbdev);
 
+static inline bool kbase_atom_needs_tiler(struct kbase_device *kbdev,
+		base_jd_core_req core_req)
+{
+	return core_req & BASE_JD_REQ_T;
+}
+
+static inline bool kbase_atom_needs_shaders(struct kbase_device *kbdev,
+		base_jd_core_req core_req)
+{
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+		return true;
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+			BASE_JD_REQ_T) {
+		/* Tiler only atom */
+		return false;
+	}
+
+	return true;
+}
+
 #endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
index ee93d4e..79777b7 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -34,7 +34,6 @@
 #include <backend/gpu/mali_kbase_cache_policy_backend.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
-#include <backend/gpu/mali_kbase_js_affinity.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 
 /* Return whether the specified ringbuffer is empty. HW access lock must be
@@ -104,8 +103,6 @@
 
 	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB;
 
-	kbase_js_debug_log_current_affinities(kbdev);
-
 	return katom;
 }
 
@@ -122,12 +119,6 @@
 	return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
 }
 
-struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
-					int js)
-{
-	return kbase_gpu_inspect(kbdev, js, 0);
-}
-
 struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
 					int js)
 {
@@ -312,221 +303,58 @@
 						int js,
 						struct kbase_jd_atom *katom)
 {
-	/* The most recently checked affinity. Having this at this scope allows
-	 * us to guarantee that we've checked the affinity in this function
-	 * call.
+	base_jd_core_req core_req = katom->core_req;
+
+	/* NOTE: The following uses a number of FALLTHROUGHs to optimize the
+	 * calls to this function. Ending of the function is indicated by BREAK
+	 * OUT.
 	 */
-	u64 recently_chosen_affinity = 0;
-	bool chosen_affinity = false;
-	bool retry;
+	switch (katom->coreref_state) {
+		/* State when job is first attempted to be run */
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Request the cores */
+		kbase_pm_request_cores(kbdev,
+				kbase_atom_needs_tiler(kbdev, core_req),
+				kbase_atom_needs_shaders(kbdev, core_req));
 
-	do {
-		retry = false;
+		/* Proceed to next state */
+		katom->coreref_state =
+		KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
 
-		/* NOTE: The following uses a number of FALLTHROUGHs to optimize
-		 * the calls to this function. Ending of the function is
-		 * indicated by BREAK OUT */
-		switch (katom->coreref_state) {
-			/* State when job is first attempted to be run */
-		case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
-			KBASE_DEBUG_ASSERT(katom->affinity == 0);
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
 
-			/* Compute affinity */
-			if (false == kbase_js_choose_affinity(
-					&recently_chosen_affinity, kbdev, katom,
-									js)) {
-				/* No cores are currently available */
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		{
+			bool cores_ready;
+
+			cores_ready = kbase_pm_cores_requested(kbdev,
+				kbase_atom_needs_tiler(kbdev, core_req),
+				kbase_atom_needs_shaders(kbdev,	core_req));
+
+			if (!cores_ready) {
+				/* Stay in this state and return, to retry at
+				 * this state later.
+				 */
+				KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+				JS_CORE_REF_REGISTER_INUSE_FAILED,
+						katom->kctx, katom,
+						katom->jc, js,
+						(u32) 0);
 				/* *** BREAK OUT: No state transition *** */
 				break;
 			}
-
-			chosen_affinity = true;
-
-			/* Request the cores */
-			kbase_pm_request_cores(kbdev,
-					katom->core_req & BASE_JD_REQ_T,
-						recently_chosen_affinity);
-
-			katom->affinity = recently_chosen_affinity;
-
 			/* Proceed to next state */
-			katom->coreref_state =
-			KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
-
-			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
-
-		case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
-			{
-				enum kbase_pm_cores_ready cores_ready;
-
-				KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
-					(katom->core_req & BASE_JD_REQ_T));
-
-				cores_ready = kbase_pm_register_inuse_cores(
-						kbdev,
-						katom->core_req & BASE_JD_REQ_T,
-						katom->affinity);
-				if (cores_ready == KBASE_NEW_AFFINITY) {
-					/* Affinity no longer valid - return to
-					 * previous state */
-					kbasep_js_job_check_deref_cores(kbdev,
-									katom);
-					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
-					JS_CORE_REF_REGISTER_INUSE_FAILED,
-							katom->kctx, katom,
-							katom->jc, js,
-							(u32) katom->affinity);
-					/* *** BREAK OUT: Return to previous
-					 * state, retry *** */
-					retry = true;
-					break;
-				}
-				if (cores_ready == KBASE_CORES_NOT_READY) {
-					/* Stay in this state and return, to
-					 * retry at this state later */
-					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
-					JS_CORE_REF_REGISTER_INUSE_FAILED,
-							katom->kctx, katom,
-							katom->jc, js,
-							(u32) katom->affinity);
-					/* *** BREAK OUT: No state transition
-					 * *** */
-					break;
-				}
-				/* Proceed to next state */
-				katom->coreref_state =
-				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
-			}
-
-			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
-
-		case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
-			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
-					(katom->core_req & BASE_JD_REQ_T));
-
-			/* Optimize out choosing the affinity twice in the same
-			 * function call */
-			if (chosen_affinity == false) {
-				/* See if the affinity changed since a previous
-				 * call. */
-				if (false == kbase_js_choose_affinity(
-						&recently_chosen_affinity,
-							kbdev, katom, js)) {
-					/* No cores are currently available */
-					kbasep_js_job_check_deref_cores(kbdev,
-									katom);
-					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
-					JS_CORE_REF_REQUEST_ON_RECHECK_FAILED,
-						katom->kctx, katom,
-						katom->jc, js,
-						(u32) recently_chosen_affinity);
-					/* *** BREAK OUT: Transition to lower
-					 * state *** */
-					break;
-				}
-				chosen_affinity = true;
-			}
-
-			/* Now see if this requires a different set of cores */
-			if (recently_chosen_affinity != katom->affinity) {
-				enum kbase_pm_cores_ready cores_ready;
-
-				kbase_pm_request_cores(kbdev,
-						katom->core_req & BASE_JD_REQ_T,
-						recently_chosen_affinity);
-
-				/* Register new cores whilst we still hold the
-				 * old ones, to minimize power transitions */
-				cores_ready =
-					kbase_pm_register_inuse_cores(kbdev,
-						katom->core_req & BASE_JD_REQ_T,
-						recently_chosen_affinity);
-				kbasep_js_job_check_deref_cores(kbdev, katom);
-
-				/* Fixup the state that was reduced by
-				 * deref_cores: */
-				katom->coreref_state =
-				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
-				katom->affinity = recently_chosen_affinity;
-				if (cores_ready == KBASE_NEW_AFFINITY) {
-					/* Affinity no longer valid - return to
-					 * previous state */
-					katom->coreref_state =
-					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
-
-					kbasep_js_job_check_deref_cores(kbdev,
-									katom);
-
-					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
-					JS_CORE_REF_REGISTER_INUSE_FAILED,
-							katom->kctx, katom,
-							katom->jc, js,
-							(u32) katom->affinity);
-					/* *** BREAK OUT: Return to previous
-					 * state, retry *** */
-					retry = true;
-					break;
-				}
-				/* Now might be waiting for powerup again, with
-				 * a new affinity */
-				if (cores_ready == KBASE_CORES_NOT_READY) {
-					/* Return to previous state */
-					katom->coreref_state =
-					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
-					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
-					JS_CORE_REF_REGISTER_ON_RECHECK_FAILED,
-							katom->kctx, katom,
-							katom->jc, js,
-							(u32) katom->affinity);
-					/* *** BREAK OUT: Transition to lower
-					 * state *** */
-					break;
-				}
-			}
-			/* Proceed to next state */
-			katom->coreref_state =
-			KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS;
-
-			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
-		case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS:
-			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
-					(katom->core_req & BASE_JD_REQ_T));
-			KBASE_DEBUG_ASSERT(katom->affinity ==
-						recently_chosen_affinity);
-
-			/* Note: this is where the caller must've taken the
-			 * hwaccess_lock */
-
-			/* Check for affinity violations - if there are any,
-			 * then we just ask the caller to requeue and try again
-			 * later */
-			if (kbase_js_affinity_would_violate(kbdev, js,
-					katom->affinity) != false) {
-				/* Return to previous state */
-				katom->coreref_state =
-				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
-				/* *** BREAK OUT: Transition to lower state ***
-				 */
-				KBASE_TRACE_ADD_SLOT_INFO(kbdev,
-					JS_CORE_REF_AFFINITY_WOULD_VIOLATE,
-					katom->kctx, katom, katom->jc, js,
-					(u32) katom->affinity);
-				break;
-			}
-
-			/* No affinity violations would result, so the cores are
-			 * ready */
 			katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
 			/* *** BREAK OUT: Cores Ready *** */
 			break;
-
-		default:
-			KBASE_DEBUG_ASSERT_MSG(false,
-					"Unhandled kbase_atom_coreref_state %d",
-							katom->coreref_state);
-			break;
 		}
-	} while (retry != false);
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+				"Unhandled kbase_atom_coreref_state %d",
+				katom->coreref_state);
+		break;
+	}
 
 	return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
 }
@@ -534,6 +362,8 @@
 static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
 						struct kbase_jd_atom *katom)
 {
+	base_jd_core_req core_req = katom->core_req;
+
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	KBASE_DEBUG_ASSERT(katom != NULL);
 
@@ -541,31 +371,18 @@
 	case KBASE_ATOM_COREREF_STATE_READY:
 		/* State where atom was submitted to the HW - just proceed to
 		 * power-down */
-		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
-					(katom->core_req & BASE_JD_REQ_T));
 
 		/* *** FALLTHROUGH *** */
 
-	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
-		/* State where cores were registered */
-		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
-					(katom->core_req & BASE_JD_REQ_T));
-		kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
-							katom->affinity);
-
-		break;
-
 	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
-		/* State where cores were requested, but not registered */
-		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
-					(katom->core_req & BASE_JD_REQ_T));
-		kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
-							katom->affinity);
+		/* State where cores were requested */
+		kbase_pm_release_cores(kbdev,
+				kbase_atom_needs_tiler(kbdev, core_req),
+				kbase_atom_needs_shaders(kbdev, core_req));
 		break;
 
 	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
 		/* Initial state - nothing required */
-		KBASE_DEBUG_ASSERT(katom->affinity == 0);
 		break;
 
 	default:
@@ -575,12 +392,11 @@
 		break;
 	}
 
-	katom->affinity = 0;
 	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
 }
 
 static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev,
-		base_jd_core_req core_req, u64 affinity,
+		base_jd_core_req core_req,
 		enum kbase_atom_coreref_state coreref_state)
 {
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
@@ -589,31 +405,18 @@
 	case KBASE_ATOM_COREREF_STATE_READY:
 		/* State where atom was submitted to the HW - just proceed to
 		 * power-down */
-		KBASE_DEBUG_ASSERT(affinity != 0 ||
-					(core_req & BASE_JD_REQ_T));
 
 		/* *** FALLTHROUGH *** */
 
-	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
-		/* State where cores were registered */
-		KBASE_DEBUG_ASSERT(affinity != 0 ||
-					(core_req & BASE_JD_REQ_T));
-		kbase_pm_release_cores(kbdev, core_req & BASE_JD_REQ_T,
-							affinity);
-
-		break;
-
 	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
-		/* State where cores were requested, but not registered */
-		KBASE_DEBUG_ASSERT(affinity != 0 ||
-					(core_req & BASE_JD_REQ_T));
-		kbase_pm_unrequest_cores(kbdev, core_req & BASE_JD_REQ_T,
-							affinity);
+		/* State where cores were requested */
+		kbase_pm_release_cores(kbdev,
+				kbase_atom_needs_tiler(kbdev, core_req),
+				kbase_atom_needs_shaders(kbdev, core_req));
 		break;
 
 	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
 		/* Initial state - nothing required */
-		KBASE_DEBUG_ASSERT(affinity == 0);
 		break;
 
 	default:
@@ -659,8 +462,6 @@
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
 
 	case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
-		kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr,
-							katom->affinity);
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
 
 	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
@@ -674,14 +475,23 @@
 			kbdev->protected_mode_transition = false;
 
 		if (kbase_jd_katom_is_protected(katom) &&
-				(katom->protected_state.enter ==
-				KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) {
+				((katom->protected_state.enter ==
+				KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) ||
+				 (katom->protected_state.enter ==
+				KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY) ||
+				 (katom->protected_state.enter ==
+				KBASE_ATOM_ENTER_PROTECTED_FINISHED))) {
 			kbase_vinstr_resume(kbdev->vinstr_ctx);
-
-			/* Go back to configured model for IPA */
-			kbase_ipa_model_use_configured_locked(kbdev);
 		}
 
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
+			if (katom->atom_flags &
+					KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) {
+				kbdev->l2_users_count--;
+				katom->atom_flags &=
+					~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT;
+			}
+		}
 
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
 
@@ -769,6 +579,19 @@
 	return kbdev->protected_mode;
 }
 
+static void kbase_gpu_disable_coherent(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/*
+	 * When entering into protected mode, we must ensure that the
+	 * GPU is not operating in coherent mode as well. This is to
+	 * ensure that no protected memory can be leaked.
+	 */
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE);
+}
+
 static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev)
 {
 	int err = -EINVAL;
@@ -778,24 +601,18 @@
 	WARN_ONCE(!kbdev->protected_ops,
 			"Cannot enter protected mode: protected callbacks not specified.\n");
 
-	/*
-	 * When entering into protected mode, we must ensure that the
-	 * GPU is not operating in coherent mode as well. This is to
-	 * ensure that no protected memory can be leaked.
-	 */
-	if (kbdev->system_coherency == COHERENCY_ACE)
-		kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE);
-
 	if (kbdev->protected_ops) {
 		/* Switch GPU to protected mode */
 		err = kbdev->protected_ops->protected_mode_enable(
 				kbdev->protected_dev);
 
-		if (err)
+		if (err) {
 			dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n",
 					err);
-		else
+		} else {
 			kbdev->protected_mode = true;
+			kbase_ipa_protection_mode_switch_event(kbdev);
+		}
 	}
 
 	return err;
@@ -818,6 +635,58 @@
 	return 0;
 }
 
+static int kbase_jm_protected_entry(struct kbase_device *kbdev,
+				struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+	err = kbase_gpu_protected_mode_enter(kbdev);
+
+	/*
+	 * Regardless of result before this call, we are no longer
+	 * transitioning the GPU.
+	 */
+
+	kbdev->protected_mode_transition = false;
+
+	KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev);
+	if (err) {
+		/*
+		 * Failed to switch into protected mode, resume
+		 * vinstr core and fail atom.
+		 */
+		kbase_vinstr_resume(kbdev->vinstr_ctx);
+		katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+		kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+		/*
+		 * Only return if head atom or previous atom
+		 * already removed - as atoms must be returned
+		 * in order.
+		 */
+		if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+		}
+
+		return -EINVAL;
+	}
+
+	/*
+	 * Protected mode sanity checks.
+	 */
+	KBASE_DEBUG_ASSERT_MSG(
+			kbase_jd_katom_is_protected(katom[idx]) ==
+			kbase_gpu_in_protected_mode(kbdev),
+			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+			kbase_jd_katom_is_protected(katom[idx]),
+			kbase_gpu_in_protected_mode(kbdev));
+	katom[idx]->gpu_rb_state =
+			KBASE_ATOM_GPU_RB_READY;
+
+	return err;
+}
+
 static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
 		struct kbase_jd_atom **katom, int idx, int js)
 {
@@ -848,9 +717,6 @@
 			return -EAGAIN;
 		}
 
-		/* Use generic model for IPA in protected mode */
-		kbase_ipa_model_use_fallback_locked(kbdev);
-
 		/* Once reaching this point GPU must be
 		 * switched to protected mode or vinstr
 		 * re-enabled. */
@@ -873,61 +739,82 @@
 			if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
 				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
 				/*
-				* The L2 is still powered, wait for all the users to
-				* finish with it before doing the actual reset.
-				*/
+				 * The L2 is still powered, wait for all the users to
+				 * finish with it before doing the actual reset.
+				 */
 				return -EAGAIN;
 			}
 		}
 
 		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY:
+		/*
+		 * When entering into protected mode, we must ensure that the
+		 * GPU is not operating in coherent mode as well. This is to
+		 * ensure that no protected memory can be leaked.
+		 */
+		kbase_gpu_disable_coherent(kbdev);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
+			/*
+			 * Power on L2 caches; this will also result in the
+			 * correct value written to coherency enable register.
+			 */
+			kbase_pm_request_l2_caches_nolock(kbdev);
+			/*
+			 * Set the flag on the atom that additional
+			 * L2 references are taken.
+			 */
+			katom[idx]->atom_flags |=
+					KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT;
+		}
+
+		katom[idx]->protected_state.enter =
 			KBASE_ATOM_ENTER_PROTECTED_FINISHED;
 
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234))
+			return -EAGAIN;
+
 		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
 
 	case KBASE_ATOM_ENTER_PROTECTED_FINISHED:
-
-		/* No jobs running, so we can switch GPU mode right now. */
-		err = kbase_gpu_protected_mode_enter(kbdev);
-
-		/*
-		 * Regardless of result, we are no longer transitioning
-		 * the GPU.
-		 */
-		kbdev->protected_mode_transition = false;
-		KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev);
-		if (err) {
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
 			/*
-			 * Failed to switch into protected mode, resume
-			 * vinstr core and fail atom.
+			 * Check that L2 caches are powered and, if so,
+			 * enter protected mode.
 			 */
-			kbase_vinstr_resume(kbdev->vinstr_ctx);
-			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
-			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
-			/* Only return if head atom or previous atom
-			 * already removed - as atoms must be returned
-			 * in order. */
-			if (idx == 0 || katom[0]->gpu_rb_state ==
-					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
-				kbase_gpu_dequeue_atom(kbdev, js, NULL);
-				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			if (kbdev->pm.backend.l2_powered != 0) {
+				/*
+				 * Remove additional L2 reference and reset
+				 * the atom flag which denotes it.
+				 */
+				if (katom[idx]->atom_flags &
+					KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) {
+					kbdev->l2_users_count--;
+					katom[idx]->atom_flags &=
+						~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT;
+				}
+
+				err = kbase_jm_protected_entry(kbdev, katom, idx, js);
+
+				if (err)
+					return err;
+			} else {
+				/*
+				 * still waiting for L2 caches to power up
+				 */
+				return -EAGAIN;
 			}
+		} else {
+			err = kbase_jm_protected_entry(kbdev, katom, idx, js);
 
-			/* Go back to configured model for IPA */
-			kbase_ipa_model_use_configured_locked(kbdev);
-
-			return -EINVAL;
+			if (err)
+				return err;
 		}
-
-		/* Protected mode sanity checks. */
-		KBASE_DEBUG_ASSERT_MSG(
-			kbase_jd_katom_is_protected(katom[idx]) ==
-			kbase_gpu_in_protected_mode(kbdev),
-			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
-			kbase_jd_katom_is_protected(katom[idx]),
-			kbase_gpu_in_protected_mode(kbdev));
-		katom[idx]->gpu_rb_state =
-			KBASE_ATOM_GPU_RB_READY;
 	}
 
 	return 0;
@@ -995,9 +882,6 @@
 
 			kbase_vinstr_resume(kbdev->vinstr_ctx);
 
-			/* Use generic model for IPA in protected mode */
-			kbase_ipa_model_use_fallback_locked(kbdev);
-
 			return -EINVAL;
 		}
 
@@ -1144,8 +1028,6 @@
 				if (!cores_ready)
 					break;
 
-				kbase_js_affinity_retain_slot_cores(kbdev, js,
-							katom[idx]->affinity);
 				katom[idx]->gpu_rb_state =
 					KBASE_ATOM_GPU_RB_WAITING_AFFINITY;
 
@@ -1247,7 +1129,8 @@
 #define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \
 	(KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER))
 
-bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
+				u32 completion_code)
 {
 	struct kbase_jd_atom *katom;
 	struct kbase_jd_atom *next_katom;
@@ -1259,23 +1142,29 @@
 
 	if (next_katom && katom->kctx == next_katom->kctx &&
 		next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
-		HAS_DEP(next_katom) &&
-		(kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL)
+		(HAS_DEP(next_katom) || next_katom->sched_priority ==
+				katom->sched_priority) &&
+		(kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO))
 									!= 0 ||
-		kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL)
+		kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI))
 									!= 0)) {
 		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
-				JS_COMMAND_NOP, NULL);
+				JS_COMMAND_NOP);
 		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
 
-		KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom,
+		if (completion_code == BASE_JD_EVENT_STOPPED) {
+			KBASE_TLSTREAM_TL_NRET_ATOM_LPU(next_katom,
 				&kbdev->gpu_props.props.raw_props.js_features
-					[katom->slot_nr]);
-		KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as
-					[katom->kctx->as_nr]);
-		KBASE_TLSTREAM_TL_NRET_CTX_LPU(katom->kctx,
+					[next_katom->slot_nr]);
+			KBASE_TLSTREAM_TL_NRET_ATOM_AS(next_katom, &kbdev->as
+					[next_katom->kctx->as_nr]);
+			KBASE_TLSTREAM_TL_NRET_CTX_LPU(next_katom->kctx,
 				&kbdev->gpu_props.props.raw_props.js_features
-					[katom->slot_nr]);
+					[next_katom->slot_nr]);
+		}
+
+		if (next_katom->core_req & BASE_JD_REQ_PERMON)
+			kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
 
 		return true;
 	}
@@ -1314,26 +1203,24 @@
 		 * flushed. To prevent future evictions causing possible memory
 		 * corruption we need to flush the cache manually before any
 		 * affected memory gets reused. */
-		katom->need_cache_flush_cores_retained = katom->affinity;
-		kbase_pm_request_cores(kbdev, false, katom->affinity);
+		katom->need_cache_flush_cores_retained = true;
+		kbase_pm_request_cores(kbdev,
+				kbase_atom_needs_tiler(kbdev, katom->core_req),
+				kbase_atom_needs_shaders(kbdev,
+						katom->core_req));
 	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
 		if (kbdev->gpu_props.num_core_groups > 1 &&
-			!(katom->affinity &
-			kbdev->gpu_props.props.coherency_info.group[0].core_mask
-									) &&
-			(katom->affinity &
-			kbdev->gpu_props.props.coherency_info.group[1].core_mask
-									)) {
+				katom->device_nr >= 1) {
 			dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
-			katom->need_cache_flush_cores_retained =
-								katom->affinity;
-			kbase_pm_request_cores(kbdev, false,
-							katom->affinity);
+			katom->need_cache_flush_cores_retained = true;
+			kbase_pm_request_cores(kbdev,
+				kbase_atom_needs_tiler(kbdev, katom->core_req),
+				kbase_atom_needs_shaders(kbdev,
+						katom->core_req));
 		}
 	}
 
 	katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
-	kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0);
 
 	if (completion_code == BASE_JD_EVENT_STOPPED) {
 		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
@@ -1348,6 +1235,8 @@
 		if (next_katom && katom->kctx == next_katom->kctx &&
 				next_katom->sched_priority ==
 				katom->sched_priority) {
+			WARN_ON(next_katom->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_SUBMITTED);
 			kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
 			kbase_jm_return_atom_to_js(kbdev, next_katom);
 		}
@@ -1355,6 +1244,13 @@
 		struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
 		int i;
 
+		if (!kbase_ctx_flag(katom->kctx, KCTX_DYING))
+			dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
+					js, completion_code,
+					kbase_exception_name
+					(kbdev,
+					completion_code));
+
 #if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
 		KBASE_TRACE_DUMP(kbdev);
 #endif
@@ -1428,10 +1324,6 @@
 	if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
 		katom->event_code = (base_jd_event_code)completion_code;
 
-	kbase_device_trace_register_access(kctx, REG_WRITE,
-						JOB_CONTROL_REG(JOB_IRQ_CLEAR),
-						1 << js);
-
 	/* Complete the job, and start new ones
 	 *
 	 * Also defer remaining work onto the workqueue:
@@ -1515,8 +1407,7 @@
 			if (!katom)
 				break;
 			if (katom->protected_state.exit ==
-					KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)
-			{
+			    KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) {
 				KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev);
 
 				kbase_vinstr_resume(kbdev->vinstr_ctx);
@@ -1544,7 +1435,6 @@
 			if (keep_in_jm_rb) {
 				kbasep_js_job_check_deref_cores(kbdev, katom);
 				katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
-				katom->affinity = 0;
 				katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
 				/* As the atom was not removed, increment the
 				 * index so that we read the correct atom in the
@@ -1607,12 +1497,6 @@
 	return -1;
 }
 
-static void kbase_job_evicted(struct kbase_jd_atom *katom)
-{
-	kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom,
-			katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT);
-}
-
 bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
 					struct kbase_context *kctx,
 					int js,
@@ -1683,12 +1567,12 @@
 			katom_idx0->kctx->blocked_js[js][prio_idx0] = true;
 		} else {
 			/* katom_idx0 is on GPU */
-			if (katom_idx1 && katom_idx1->gpu_rb_state ==
+			if (katom_idx1_valid && katom_idx1->gpu_rb_state ==
 						KBASE_ATOM_GPU_RB_SUBMITTED) {
 				/* katom_idx0 and katom_idx1 are on GPU */
 
 				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
-						JS_COMMAND_NEXT), NULL) == 0) {
+						JS_COMMAND_NEXT)) == 0) {
 					/* idx0 has already completed - stop
 					 * idx1 if needed*/
 					if (katom_idx1_valid) {
@@ -1703,19 +1587,18 @@
 					kbase_reg_write(kbdev,
 							JOB_SLOT_REG(js,
 							JS_COMMAND_NEXT),
-							JS_COMMAND_NOP, NULL);
+							JS_COMMAND_NOP);
 
 					if (kbase_reg_read(kbdev,
 							JOB_SLOT_REG(js,
-							JS_HEAD_NEXT_LO), NULL)
+							JS_HEAD_NEXT_LO))
 									!= 0 ||
 						kbase_reg_read(kbdev,
 							JOB_SLOT_REG(js,
-							JS_HEAD_NEXT_HI), NULL)
+							JS_HEAD_NEXT_HI))
 									!= 0) {
 						/* idx1 removed successfully,
 						 * will be handled in IRQ */
-						kbase_job_evicted(katom_idx1);
 						kbase_gpu_remove_atom(kbdev,
 								katom_idx1,
 								action, true);
@@ -1769,7 +1652,7 @@
 		} else {
 			/* idx1 is on GPU */
 			if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
-						JS_COMMAND_NEXT), NULL) == 0) {
+						JS_COMMAND_NEXT)) == 0) {
 				/* idx0 has already completed - stop idx1 */
 				kbase_gpu_stop_atom(kbdev, js, katom_idx1,
 									action);
@@ -1779,15 +1662,14 @@
 				 * remove */
 				kbase_reg_write(kbdev, JOB_SLOT_REG(js,
 							JS_COMMAND_NEXT),
-							JS_COMMAND_NOP, NULL);
+							JS_COMMAND_NOP);
 
 				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
-						JS_HEAD_NEXT_LO), NULL) != 0 ||
+						JS_HEAD_NEXT_LO)) != 0 ||
 				    kbase_reg_read(kbdev, JOB_SLOT_REG(js,
-						JS_HEAD_NEXT_HI), NULL) != 0) {
+						JS_HEAD_NEXT_HI)) != 0) {
 					/* idx1 removed successfully, will be
 					 * handled in IRQ once idx0 completes */
-					kbase_job_evicted(katom_idx1);
 					kbase_gpu_remove_atom(kbdev, katom_idx1,
 									action,
 									false);
@@ -1827,11 +1709,11 @@
 	/* clean & invalidate the caches */
 	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+					GPU_COMMAND_CLEAN_INV_CACHES);
 
 	/* wait for cache flush to complete before continuing */
 	while (--max_loops &&
-		(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+		(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) &
 						CLEAN_CACHES_COMPLETED) == 0)
 		;
 
@@ -1839,7 +1721,7 @@
 	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
 							CLEAN_CACHES_COMPLETED);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
-						CLEAN_CACHES_COMPLETED, NULL);
+						CLEAN_CACHES_COMPLETED);
 	KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
 						KBASE_INSTR_STATE_CLEANING,
 	    "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
@@ -1856,10 +1738,12 @@
 		kbase_gpu_cacheclean(kbdev);
 
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-		kbase_pm_unrequest_cores(kbdev, false,
-					katom->need_cache_flush_cores_retained);
+		kbase_pm_release_cores(kbdev,
+				kbase_atom_needs_tiler(kbdev, katom->core_req),
+				kbase_atom_needs_shaders(kbdev,
+						katom->core_req));
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-		katom->need_cache_flush_cores_retained = 0;
+		katom->need_cache_flush_cores_retained = false;
 	}
 }
 
@@ -1895,18 +1779,16 @@
 	 * this is not done, then if the atom is re-scheduled (following a soft
 	 * stop) then the core reference would not be retaken. */
 	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
-	katom->affinity = 0;
 }
 
 void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
-		base_jd_core_req core_req, u64 affinity,
+		base_jd_core_req core_req,
 		enum kbase_atom_coreref_state coreref_state)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity,
-			coreref_state);
+	kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, coreref_state);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	if (!kbdev->pm.active_count) {
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
index 4567008..c3b9f2d 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -33,15 +33,17 @@
 /**
  * kbase_gpu_irq_evict - Evict an atom from a NEXT slot
  *
- * @kbdev:         Device pointer
- * @js:            Job slot to evict from
+ * @kbdev:           Device pointer
+ * @js:              Job slot to evict from
+ * @completion_code: Event code from job that was run.
  *
  * Evict the atom in the NEXT slot for the specified job slot. This function is
  * called from the job complete IRQ handler when the previous job has failed.
  *
  * Return: true if job evicted from NEXT registers, false otherwise
  */
-bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js);
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
+				u32 completion_code);
 
 /**
  * kbase_gpu_complete_hw - Complete an atom on job slot js
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
deleted file mode 100644
index c937eca..0000000
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
+++ /dev/null
@@ -1,308 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-
-/*
- * Base kernel affinity manager APIs
- */
-
-#include <mali_kbase.h>
-#include "mali_kbase_js_affinity.h"
-#include "mali_kbase_hw.h"
-
-#include <backend/gpu/mali_kbase_pm_internal.h>
-
-
-bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
-									int js)
-{
-	/*
-	 * Here are the reasons for using job slot 2:
-	 * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
-	 * - In absence of the above, then:
-	 *  - Atoms with BASE_JD_REQ_COHERENT_GROUP
-	 *  - But, only when there aren't contexts with
-	 *  KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
-	 *  all cores on slot 1 could be blocked by those using a coherent group
-	 *  on slot 2
-	 *  - And, only when you actually have 2 or more coregroups - if you
-	 *  only have 1 coregroup, then having jobs for slot 2 implies they'd
-	 *  also be for slot 1, meaning you'll get interference from them. Jobs
-	 *  able to run on slot 2 could also block jobs that can only run on
-	 *  slot 1 (tiler jobs)
-	 */
-	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
-		return true;
-
-	if (js != 2)
-		return true;
-
-	/* Only deal with js==2 now: */
-	if (kbdev->gpu_props.num_core_groups > 1) {
-		/* Only use slot 2 in the 2+ coregroup case */
-		if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
-					KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
-								false) {
-			/* ...But only when we *don't* have atoms that run on
-			 * all cores */
-
-			/* No specific check for BASE_JD_REQ_COHERENT_GROUP
-			 * atoms - the policy will sort that out */
-			return true;
-		}
-	}
-
-	/* Above checks failed mean we shouldn't use slot 2 */
-	return false;
-}
-
-/*
- * As long as it has been decided to have a deeper modification of
- * what job scheduler, power manager and affinity manager will
- * implement, this function is just an intermediate step that
- * assumes:
- * - all working cores will be powered on when this is called.
- * - largest current configuration is 2 core groups.
- * - It has been decided not to have hardcoded values so the low
- *   and high cores in a core split will be evently distributed.
- * - Odd combinations of core requirements have been filtered out
- *   and do not get to this function (e.g. CS+T+NSS is not
- *   supported here).
- * - This function is frequently called and can be optimized,
- *   (see notes in loops), but as the functionallity will likely
- *   be modified, optimization has not been addressed.
-*/
-bool kbase_js_choose_affinity(u64 * const affinity,
-					struct kbase_device *kbdev,
-					struct kbase_jd_atom *katom, int js)
-{
-	base_jd_core_req core_req = katom->core_req;
-	unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
-	u64 core_availability_mask;
-
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
-
-	/*
-	 * If no cores are currently available (core availability policy is
-	 * transitioning) then fail.
-	 */
-	if (0 == core_availability_mask) {
-		*affinity = 0;
-		return false;
-	}
-
-	KBASE_DEBUG_ASSERT(js >= 0);
-
-	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
-								BASE_JD_REQ_T) {
-		 /* If the hardware supports XAFFINITY then we'll only enable
-		  * the tiler (which is the default so this is a no-op),
-		  * otherwise enable shader core 0. */
-		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
-			*affinity = 1;
-		else
-			*affinity = 0;
-
-		return true;
-	}
-
-	if (1 == kbdev->gpu_props.num_cores) {
-		/* trivial case only one core, nothing to do */
-		*affinity = core_availability_mask &
-				kbdev->pm.debug_core_mask[js];
-	} else {
-		if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
-					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
-			if (js == 0 || num_core_groups == 1) {
-				/* js[0] and single-core-group systems just get
-				 * the first core group */
-				*affinity =
-				kbdev->gpu_props.props.coherency_info.group[0].core_mask
-						& core_availability_mask &
-						kbdev->pm.debug_core_mask[js];
-			} else {
-				/* js[1], js[2] use core groups 0, 1 for
-				 * dual-core-group systems */
-				u32 core_group_idx = ((u32) js) - 1;
-
-				KBASE_DEBUG_ASSERT(core_group_idx <
-							num_core_groups);
-				*affinity =
-				kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
-						& core_availability_mask &
-						kbdev->pm.debug_core_mask[js];
-
-				/* If the job is specifically targeting core
-				 * group 1 and the core availability policy is
-				 * keeping that core group off, then fail */
-				if (*affinity == 0 && core_group_idx == 1 &&
-						kbdev->pm.backend.cg1_disabled
-								== true)
-					katom->event_code =
-							BASE_JD_EVENT_PM_EVENT;
-			}
-		} else {
-			/* All cores are available when no core split is
-			 * required */
-			*affinity = core_availability_mask &
-					kbdev->pm.debug_core_mask[js];
-		}
-	}
-
-	/*
-	 * If no cores are currently available in the desired core group(s)
-	 * (core availability policy is transitioning) then fail.
-	 */
-	if (*affinity == 0)
-		return false;
-
-	/* Enable core 0 if tiler required for hardware without XAFFINITY
-	 * support (notes above) */
-	if (core_req & BASE_JD_REQ_T) {
-		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
-			*affinity = *affinity | 1;
-	}
-
-	return true;
-}
-
-static inline bool kbase_js_affinity_is_violating(
-						struct kbase_device *kbdev,
-								u64 *affinities)
-{
-	/* This implementation checks whether the two slots involved in Generic
-	 * thread creation have intersecting affinity. This is due to micro-
-	 * architectural issues where a job in slot A targetting cores used by
-	 * slot B could prevent the job in slot B from making progress until the
-	 * job in slot A has completed.
-	 */
-	u64 affinity_set_left;
-	u64 affinity_set_right;
-	u64 intersection;
-
-	KBASE_DEBUG_ASSERT(affinities != NULL);
-
-	affinity_set_left = affinities[1];
-
-	affinity_set_right = affinities[2];
-
-	/* A violation occurs when any bit in the left_set is also in the
-	 * right_set */
-	intersection = affinity_set_left & affinity_set_right;
-
-	return (bool) (intersection != (u64) 0u);
-}
-
-bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
-								u64 affinity)
-{
-	struct kbasep_js_device_data *js_devdata;
-	u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
-
-	KBASE_DEBUG_ASSERT(kbdev != NULL);
-	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
-	js_devdata = &kbdev->js_data;
-
-	memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
-			sizeof(js_devdata->runpool_irq.slot_affinities));
-
-	new_affinities[js] |= affinity;
-
-	return kbase_js_affinity_is_violating(kbdev, new_affinities);
-}
-
-void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
-								u64 affinity)
-{
-	struct kbasep_js_device_data *js_devdata;
-	u64 cores;
-
-	KBASE_DEBUG_ASSERT(kbdev != NULL);
-	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
-	js_devdata = &kbdev->js_data;
-
-	KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
-								== false);
-
-	cores = affinity;
-	while (cores) {
-		int bitnum = fls64(cores) - 1;
-		u64 bit = 1ULL << bitnum;
-		s8 cnt;
-
-		cnt =
-		++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
-
-		if (cnt == 1)
-			js_devdata->runpool_irq.slot_affinities[js] |= bit;
-
-		cores &= ~bit;
-	}
-}
-
-void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
-								u64 affinity)
-{
-	struct kbasep_js_device_data *js_devdata;
-	u64 cores;
-
-	KBASE_DEBUG_ASSERT(kbdev != NULL);
-	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
-	js_devdata = &kbdev->js_data;
-
-	cores = affinity;
-	while (cores) {
-		int bitnum = fls64(cores) - 1;
-		u64 bit = 1ULL << bitnum;
-		s8 cnt;
-
-		KBASE_DEBUG_ASSERT(
-		js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
-
-		cnt =
-		--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
-
-		if (0 == cnt)
-			js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
-
-		cores &= ~bit;
-	}
-}
-
-#if KBASE_TRACE_ENABLE
-void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
-{
-	struct kbasep_js_device_data *js_devdata;
-	int slot_nr;
-
-	KBASE_DEBUG_ASSERT(kbdev != NULL);
-	js_devdata = &kbdev->js_data;
-
-	for (slot_nr = 0; slot_nr < 3; ++slot_nr)
-		KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
-							NULL, 0u, slot_nr,
-			(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
-}
-#endif				/* KBASE_TRACE_ENABLE  */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
deleted file mode 100644
index dbabd94..0000000
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-
-/*
- * Affinity Manager internal APIs.
- */
-
-#ifndef _KBASE_JS_AFFINITY_H_
-#define _KBASE_JS_AFFINITY_H_
-
-/**
- * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to
- * submit a job to a particular job slot in the current status
- *
- * @kbdev: The kbase device structure of the device
- * @js:    Job slot number to check for allowance
- *
- * Will check if submitting to the given job slot is allowed in the current
- * status.  For example using job slot 2 while in soft-stoppable state and only
- * having 1 coregroup is not allowed by the policy. This function should be
- * called prior to submitting a job to a slot to make sure policy rules are not
- * violated.
- *
- * The following locking conditions are made on the caller
- * - it must hold hwaccess_lock
- */
-bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js);
-
-/**
- * kbase_js_choose_affinity - Compute affinity for a given job.
- *
- * @affinity: Affinity bitmap computed
- * @kbdev:    The kbase device structure of the device
- * @katom:    Job chain of which affinity is going to be found
- * @js:       Slot the job chain is being submitted
- *
- * Currently assumes an all-on/all-off power management policy.
- * Also assumes there is at least one core with tiler available.
- *
- * Returns true if a valid affinity was chosen, false if
- * no cores were available.
- */
-bool kbase_js_choose_affinity(u64 * const affinity,
-					struct kbase_device *kbdev,
-					struct kbase_jd_atom *katom,
-					int js);
-
-/**
- * kbase_js_affinity_would_violate - Determine whether a proposed affinity on
- * job slot @js would cause a violation of affinity restrictions.
- *
- * @kbdev:    Kbase device structure
- * @js:       The job slot to test
- * @affinity: The affinity mask to test
- *
- * The following locks must be held by the caller
- * - hwaccess_lock
- *
- * Return: true if the affinity would violate the restrictions
- */
-bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
-								u64 affinity);
-
-/**
- * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by
- *                                       a slot
- *
- * @kbdev:    Kbase device structure
- * @js:       The job slot retaining the cores
- * @affinity: The cores to retain
- *
- * The following locks must be held by the caller
- * - hwaccess_lock
- */
-void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
-								u64 affinity);
-
-/**
- * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used
- *                                        by a slot
- *
- * @kbdev:    Kbase device structure
- * @js:       Job slot
- * @affinity: Bit mask of core to be released
- *
- * Cores must be released as soon as a job is dequeued from a slot's 'submit
- * slots', and before another job is submitted to those slots. Otherwise, the
- * refcount could exceed the maximum number submittable to a slot,
- * %BASE_JM_SUBMIT_SLOTS.
- *
- * The following locks must be held by the caller
- * - hwaccess_lock
- */
-void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
-								u64 affinity);
-
-/**
- * kbase_js_debug_log_current_affinities - log the current affinities
- *
- * @kbdev:  Kbase device structure
- *
- * Output to the Trace log the current tracked affinities on all slots
- */
-#if KBASE_TRACE_ENABLE
-void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
-#else				/*  KBASE_TRACE_ENABLE  */
-static inline void
-kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
-{
-}
-#endif				/*  KBASE_TRACE_ENABLE  */
-
-#endif				/* _KBASE_JS_AFFINITY_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
index 2dc97859..df2dd5ec 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -147,16 +147,17 @@
 
 				/* Job is Soft-Stoppable */
 				if (ticks == soft_stop_ticks) {
-					int disjoint_threshold =
-		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
-					u32 softstop_flags = 0u;
 					/* Job has been scheduled for at least
 					 * js_devdata->soft_stop_ticks ticks.
 					 * Soft stop the slot so we can run
 					 * other jobs.
 					 */
-					dev_dbg(kbdev->dev, "Soft-stop");
 #if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+					int disjoint_threshold =
+		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+					u32 softstop_flags = 0u;
+
+					dev_dbg(kbdev->dev, "Soft-stop");
 					/* nr_user_contexts_running is updated
 					 * with the runpool_mutex, but we can't
 					 * take that here.
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
index ad27202..3e9af77 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -66,15 +66,15 @@
 }
 
 static int wait_ready(struct kbase_device *kbdev,
-		unsigned int as_nr, struct kbase_context *kctx)
+		unsigned int as_nr)
 {
 	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
-	u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+	u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
 
 	/* Wait for the MMU status to indicate there is no active command, in
 	 * case one is pending. Do not log remaining register accesses. */
 	while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
-		val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL);
+		val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
 
 	if (max_loops == 0) {
 		dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
@@ -83,27 +83,24 @@
 
 	/* If waiting in loop was performed, log last read value. */
 	if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
-		kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+		kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
 
 	return 0;
 }
 
-static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
-		struct kbase_context *kctx)
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd)
 {
 	int status;
 
 	/* write AS_COMMAND when MMU is ready to accept another command */
-	status = wait_ready(kbdev, as_nr, kctx);
+	status = wait_ready(kbdev, as_nr);
 	if (status == 0)
-		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
-									kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd);
 
 	return status;
 }
 
-static void validate_protected_page_fault(struct kbase_device *kbdev,
-		struct kbase_context *kctx)
+static void validate_protected_page_fault(struct kbase_device *kbdev)
 {
 	/* GPUs which support (native) protected mode shall not report page
 	 * fault addresses unless it has protected debug mode and protected
@@ -115,8 +112,7 @@
 
 	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
 		protected_debug_mode = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(GPU_STATUS),
-				kctx) & GPU_DBGEN;
+				GPU_CONTROL_REG(GPU_STATUS)) & GPU_DBGEN;
 	}
 
 	if (!protected_debug_mode) {
@@ -145,9 +141,9 @@
 
 	/* remember current mask */
 	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
-	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
 	/* mask interrupts for now */
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0);
 	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
 
 	while (bf_bits | pf_bits) {
@@ -170,25 +166,21 @@
 		 */
 		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
 
-
 		/* find faulting address */
 		as->fault_addr = kbase_reg_read(kbdev,
 						MMU_AS_REG(as_no,
-							AS_FAULTADDRESS_HI),
-						kctx);
+							AS_FAULTADDRESS_HI));
 		as->fault_addr <<= 32;
 		as->fault_addr |= kbase_reg_read(kbdev,
 						MMU_AS_REG(as_no,
-							AS_FAULTADDRESS_LO),
-						kctx);
+							AS_FAULTADDRESS_LO));
 
 		/* Mark the fault protected or not */
 		as->protected_mode = kbdev->protected_mode;
 
-		if (kbdev->protected_mode && as->fault_addr)
-		{
+		if (kbdev->protected_mode && as->fault_addr) {
 			/* check if address reporting is allowed */
-			validate_protected_page_fault(kbdev, kctx);
+			validate_protected_page_fault(kbdev);
 		}
 
 		/* report the fault to debugfs */
@@ -197,8 +189,7 @@
 		/* record the fault status */
 		as->fault_status = kbase_reg_read(kbdev,
 						  MMU_AS_REG(as_no,
-							AS_FAULTSTATUS),
-						  kctx);
+							AS_FAULTSTATUS));
 
 		/* find the fault type */
 		as->fault_type = (bf_bits & (1 << as_no)) ?
@@ -207,12 +198,10 @@
 
 		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
 			as->fault_extra_addr = kbase_reg_read(kbdev,
-					MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
-					kctx);
+					MMU_AS_REG(as_no, AS_FAULTEXTRA_HI));
 			as->fault_extra_addr <<= 32;
 			as->fault_extra_addr |= kbase_reg_read(kbdev,
-					MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
-					kctx);
+					MMU_AS_REG(as_no, AS_FAULTEXTRA_LO));
 		}
 
 		if (kbase_as_has_bus_fault(as)) {
@@ -241,14 +230,13 @@
 
 	/* reenable interrupts */
 	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
-	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
 	new_mask |= tmp;
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask);
 	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
 }
 
-void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
-		struct kbase_context *kctx)
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
 {
 	struct kbase_mmu_setup *current_setup = &as->current_setup;
 	u32 transcfg = 0;
@@ -271,35 +259,34 @@
 		}
 
 		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
-				transcfg, kctx);
+				transcfg);
 		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
-				(current_setup->transcfg >> 32) & 0xFFFFFFFFUL,
-				kctx);
+				(current_setup->transcfg >> 32) & 0xFFFFFFFFUL);
 	} else {
 		if (kbdev->system_coherency == COHERENCY_ACE)
 			current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
 	}
 
 	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
-			current_setup->transtab & 0xFFFFFFFFUL, kctx);
+			current_setup->transtab & 0xFFFFFFFFUL);
 	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
-			(current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
+			(current_setup->transtab >> 32) & 0xFFFFFFFFUL);
 
 	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
-			current_setup->memattr & 0xFFFFFFFFUL, kctx);
+			current_setup->memattr & 0xFFFFFFFFUL);
 	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
-			(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
+			(current_setup->memattr >> 32) & 0xFFFFFFFFUL);
 
 	KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as,
 			current_setup->transtab,
 			current_setup->memattr,
 			transcfg);
 
-	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
+	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE);
 }
 
 int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
-		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
+		u64 vpfn, u32 nr, u32 op,
 		unsigned int handling_irq)
 {
 	int ret;
@@ -308,22 +295,22 @@
 
 	if (op == AS_COMMAND_UNLOCK) {
 		/* Unlock doesn't require a lock first */
-		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
 	} else {
 		u64 lock_addr = lock_region(kbdev, vpfn, nr);
 
 		/* Lock the region that needs to be updated */
 		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
-				lock_addr & 0xFFFFFFFFUL, kctx);
+				lock_addr & 0xFFFFFFFFUL);
 		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
-				(lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
-		write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
+				(lock_addr >> 32) & 0xFFFFFFFFUL);
+		write_cmd(kbdev, as->number, AS_COMMAND_LOCK);
 
 		/* Run the MMU operation */
-		write_cmd(kbdev, as->number, op, kctx);
+		write_cmd(kbdev, as->number, op);
 
 		/* Wait for the flush to complete */
-		ret = wait_ready(kbdev, as->number, kctx);
+		ret = wait_ready(kbdev, as->number);
 
 		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
 			/* Issue an UNLOCK command to ensure that valid page
@@ -340,8 +327,8 @@
 			   commands in order to flush the MMU/uTLB,
 			   see PRLAM-8812.
 			 */
-			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
-			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
 		}
 	}
 
@@ -349,7 +336,7 @@
 }
 
 void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
-		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+		enum kbase_mmu_fault_type type)
 {
 	unsigned long flags;
 	u32 pf_bf_mask;
@@ -369,14 +356,14 @@
 			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
 		pf_bf_mask |= MMU_BUS_ERROR(as->number);
 
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask);
 
 unlock:
 	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
 }
 
 void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
-		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+		enum kbase_mmu_fault_type type)
 {
 	unsigned long flags;
 	u32 irq_mask;
@@ -392,14 +379,14 @@
 	if (kbdev->irq_reset_flush)
 		goto unlock;
 
-	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
+	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)) |
 			MMU_PAGE_FAULT(as->number);
 
 	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
 			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
 		irq_mask |= MMU_BUS_ERROR(as->number);
 
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask);
 
 unlock:
 	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
index 2ed7dfd..51a10a2 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2015,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,9 +29,9 @@
 #include <mali_kbase.h>
 #include <mali_kbase_pm.h>
 
-static u64 always_on_get_core_mask(struct kbase_device *kbdev)
+static bool always_on_shaders_needed(struct kbase_device *kbdev)
 {
-	return kbdev->gpu_props.props.raw_props.shader_present;
+	return true;
 }
 
 static bool always_on_get_core_active(struct kbase_device *kbdev)
@@ -59,7 +59,7 @@
 	"always_on",			/* name */
 	always_on_init,			/* init */
 	always_on_term,			/* term */
-	always_on_get_core_mask,	/* get_core_mask */
+	always_on_shaders_needed,	/* shaders_needed */
 	always_on_get_core_active,	/* get_core_active */
 	0u,				/* flags */
 	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
index d61d0d0e..e7927cf 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
@@ -1,7 +1,6 @@
-
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -37,13 +36,13 @@
  *
  * - When KBase indicates that the GPU will be powered up, but we don't yet
  *   know which Job Chains are to be run:
- *    All Shader Cores are powered up, regardless of whether or not they will
- *    be needed later.
+ *    Shader Cores are powered up, regardless of whether or not they will be
+ *    needed later.
  *
- * - When KBase indicates that a set of Shader Cores are needed to submit the
- *   currently queued Job Chains:
- *    All Shader Cores are kept powered, regardless of whether or not they will
- *    be needed
+ * - When KBase indicates that Shader Cores are needed to submit the currently
+ *   queued Job Chains:
+ *    Shader Cores are kept powered, regardless of whether or not they will be
+ *    needed
  *
  * - When KBase indicates that the GPU need not be powered:
  *    The Shader Cores are kept powered, regardless of whether or not they will
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
index 0d899cc..a448a3b 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -179,11 +179,7 @@
 	kbase_pm_clock_on(kbdev, is_resume);
 
 	/* Update core status as required by the policy */
-	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
-				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
 	kbase_pm_update_cores_state(kbdev);
-	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
-				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
 
 	/* NOTE: We don't wait to reach the desired state, since running atoms
 	 * will wait for that state to be reached anyway */
@@ -201,11 +197,7 @@
 #if !PLATFORM_POWER_DOWN_ONLY
 	/* Wait for power transitions to complete. We do this with no locks held
 	 * so that we don't deadlock with any pending workqueues */
-	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
-				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
 	kbase_pm_check_transitions_sync(kbdev);
-	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
-				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
 #endif /* !PLATFORM_POWER_DOWN_ONLY */
 
 	mutex_lock(&js_devdata->runpool_mutex);
@@ -233,10 +225,6 @@
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 #endif /* !PLATFORM_POWER_DOWN_ONLY */
 
-		/* Consume any change-state events */
-		kbase_timeline_pm_check_handle_event(kbdev,
-					KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
-
 		/* Disable interrupts and turn the clock off */
 		if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
 			/*
@@ -252,7 +240,10 @@
 
 			/* Turn off clock now that fault have been handled. We
 			 * dropped locks so poweron_required may have changed -
-			 * power back on if this is the case.*/
+			 * power back on if this is the case (effectively only
+			 * re-enabling of the interrupts would be done in this
+			 * case, as the clocks to GPU were not withdrawn yet).
+			 */
 			if (backend->poweron_required)
 				kbase_pm_clock_on(kbdev, false);
 			else
@@ -422,21 +413,12 @@
 	bool cores_are_available;
 	unsigned long flags;
 
-	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
-				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
-	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
-				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
 
-	if (cores_are_available) {
-		/* Log timelining information that a change in state has
-		 * completed */
-		kbase_timeline_pm_handle_event(kbdev,
-				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
-
+	if (cores_are_available)
 		kbase_backend_slot_update(kbdev);
-	}
+
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
index 5b369fb..d4e8e42 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,145 +28,65 @@
 #include <mali_kbase_pm.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 
-static const struct kbase_pm_ca_policy *const policy_list[] = {
-	&kbase_pm_ca_fixed_policy_ops,
-#ifdef CONFIG_MALI_DEVFREQ
-	&kbase_pm_ca_devfreq_policy_ops,
-#endif
-#if !MALI_CUSTOMER_RELEASE
-	&kbase_pm_ca_random_policy_ops
-#endif
-};
-
-/**
- * POLICY_COUNT - The number of policies available in the system.
- *
- * This is derived from the number of functions listed in policy_list.
- */
-#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
-
 int kbase_pm_ca_init(struct kbase_device *kbdev)
 {
-	KBASE_DEBUG_ASSERT(kbdev != NULL);
-
-	kbdev->pm.backend.ca_current_policy = policy_list[0];
-
-	kbdev->pm.backend.ca_current_policy->init(kbdev);
+	struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
+#ifdef CONFIG_MALI_DEVFREQ
+	if (kbdev->current_core_mask)
+		pm_backend->ca_cores_enabled = kbdev->current_core_mask;
+	else
+		pm_backend->ca_cores_enabled =
+				kbdev->gpu_props.props.raw_props.shader_present;
+#endif
+	pm_backend->ca_in_transition = false;
 
 	return 0;
 }
 
 void kbase_pm_ca_term(struct kbase_device *kbdev)
 {
-	kbdev->pm.backend.ca_current_policy->term(kbdev);
 }
 
-int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
+#ifdef CONFIG_MALI_DEVFREQ
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
 {
-	if (!list)
-		return POLICY_COUNT;
-
-	*list = policy_list;
-
-	return POLICY_COUNT;
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
-
-const struct kbase_pm_ca_policy
-*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
-{
-	KBASE_DEBUG_ASSERT(kbdev != NULL);
-
-	return kbdev->pm.backend.ca_current_policy;
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
-
-void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
-				const struct kbase_pm_ca_policy *new_policy)
-{
-	const struct kbase_pm_ca_policy *old_policy;
+	struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
 	unsigned long flags;
 
-	KBASE_DEBUG_ASSERT(kbdev != NULL);
-	KBASE_DEBUG_ASSERT(new_policy != NULL);
-
-	KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
-								new_policy->id);
-
-	/* During a policy change we pretend the GPU is active */
-	/* A suspend won't happen here, because we're in a syscall from a
-	 * userspace thread */
-	kbase_pm_context_active(kbdev);
-
-	mutex_lock(&kbdev->pm.lock);
-
-	/* Remove the policy to prevent IRQ handlers from working on it */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	old_policy = kbdev->pm.backend.ca_current_policy;
-	kbdev->pm.backend.ca_current_policy = NULL;
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
-	if (old_policy->term)
-		old_policy->term(kbdev);
+	pm_backend->ca_cores_enabled = core_mask;
 
-	if (new_policy->init)
-		new_policy->init(kbdev);
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbdev->pm.backend.ca_current_policy = new_policy;
-
-	/* If any core power state changes were previously attempted, but
-	 * couldn't be made because the policy was changing (current_policy was
-	 * NULL), then re-try them here. */
 	kbase_pm_update_cores_state_nolock(kbdev);
 
-	kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
-					kbdev->shader_ready_bitmap,
-					kbdev->shader_transitioning_bitmap);
-
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
-	mutex_unlock(&kbdev->pm.lock);
-
-	/* Now the policy change is finished, we release our fake context active
-	 * reference */
-	kbase_pm_context_idle(kbdev);
+	dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n",
+			pm_backend->ca_cores_enabled);
 }
-
-KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
+#endif
 
 u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
 {
+	struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
+
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	/* All cores must be enabled when instrumentation is in use */
-	if (kbdev->pm.backend.instr_enabled)
+	if (pm_backend->instr_enabled)
 		return kbdev->gpu_props.props.raw_props.shader_present &
 				kbdev->pm.debug_core_mask_all;
 
-	if (kbdev->pm.backend.ca_current_policy == NULL)
-		return kbdev->gpu_props.props.raw_props.shader_present &
-				kbdev->pm.debug_core_mask_all;
-
-	return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
-						kbdev->pm.debug_core_mask_all;
+#ifdef CONFIG_MALI_DEVFREQ
+	return pm_backend->ca_cores_enabled & kbdev->pm.debug_core_mask_all;
+#else
+	return kbdev->gpu_props.props.raw_props.shader_present &
+			kbdev->pm.debug_core_mask_all;
+#endif
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
 
-void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
-							u64 cores_transitioning)
-{
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	if (kbdev->pm.backend.ca_current_policy != NULL)
-		kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
-							cores_ready,
-							cores_transitioning);
-}
-
 void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
 {
 	unsigned long flags;
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c
deleted file mode 100644
index 4bb4c40..0000000
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-/*
- * A core availability policy implementing core mask selection from devfreq OPPs
- *
- */
-
-#include <mali_kbase.h>
-#include <mali_kbase_pm.h>
-#include <backend/gpu/mali_kbase_pm_internal.h>
-#include <linux/version.h>
-
-void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
-{
-	struct kbasep_pm_ca_policy_devfreq *data =
-				&kbdev->pm.backend.ca_policy_data.devfreq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
-	data->cores_desired = core_mask;
-
-	/* Disable any cores that are now unwanted */
-	data->cores_enabled &= data->cores_desired;
-
-	kbdev->pm.backend.ca_in_transition = true;
-
-	/* If there are no cores to be powered off then power on desired cores
-	 */
-	if (!(data->cores_used & ~data->cores_desired)) {
-		data->cores_enabled = data->cores_desired;
-		kbdev->pm.backend.ca_in_transition = false;
-	}
-
-	kbase_pm_update_cores_state_nolock(kbdev);
-
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-	dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX %llX\n",
-				data->cores_desired, data->cores_enabled);
-}
-
-static void devfreq_init(struct kbase_device *kbdev)
-{
-	struct kbasep_pm_ca_policy_devfreq *data =
-				&kbdev->pm.backend.ca_policy_data.devfreq;
-
-	if (kbdev->current_core_mask) {
-		data->cores_enabled = kbdev->current_core_mask;
-		data->cores_desired = kbdev->current_core_mask;
-	} else {
-		data->cores_enabled =
-				kbdev->gpu_props.props.raw_props.shader_present;
-		data->cores_desired =
-				kbdev->gpu_props.props.raw_props.shader_present;
-	}
-	data->cores_used = 0;
-	kbdev->pm.backend.ca_in_transition = false;
-}
-
-static void devfreq_term(struct kbase_device *kbdev)
-{
-}
-
-static u64 devfreq_get_core_mask(struct kbase_device *kbdev)
-{
-	return kbdev->pm.backend.ca_policy_data.devfreq.cores_enabled;
-}
-
-static void devfreq_update_core_status(struct kbase_device *kbdev,
-							u64 cores_ready,
-							u64 cores_transitioning)
-{
-	struct kbasep_pm_ca_policy_devfreq *data =
-				&kbdev->pm.backend.ca_policy_data.devfreq;
-
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	data->cores_used = cores_ready | cores_transitioning;
-
-	/* If in desired state then clear transition flag */
-	if (data->cores_enabled == data->cores_desired)
-		kbdev->pm.backend.ca_in_transition = false;
-
-	/* If all undesired cores are now off then power on desired cores.
-	 * The direct comparison against cores_enabled limits potential
-	 * recursion to one level */
-	if (!(data->cores_used & ~data->cores_desired) &&
-				data->cores_enabled != data->cores_desired) {
-		data->cores_enabled = data->cores_desired;
-
-		kbase_pm_update_cores_state_nolock(kbdev);
-
-		kbdev->pm.backend.ca_in_transition = false;
-	}
-}
-
-/*
- * The struct kbase_pm_ca_policy structure for the devfreq core availability
- * policy.
- *
- * This is the static structure that defines the devfreq core availability power
- * policy's callback and name.
- */
-const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops = {
-	"devfreq",			/* name */
-	devfreq_init,			/* init */
-	devfreq_term,			/* term */
-	devfreq_get_core_mask,		/* get_core_mask */
-	devfreq_update_core_status,	/* update_core_status */
-	0u,				/* flags */
-	KBASE_PM_CA_POLICY_ID_DEVFREQ,	/* id */
-};
-
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
deleted file mode 100644
index 1eea7e8..0000000
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-/*
- * A power policy implementing fixed core availability
- */
-
-#include <mali_kbase.h>
-#include <mali_kbase_pm.h>
-
-static void fixed_init(struct kbase_device *kbdev)
-{
-	kbdev->pm.backend.ca_in_transition = false;
-}
-
-static void fixed_term(struct kbase_device *kbdev)
-{
-	CSTD_UNUSED(kbdev);
-}
-
-static u64 fixed_get_core_mask(struct kbase_device *kbdev)
-{
-	return kbdev->gpu_props.props.raw_props.shader_present;
-}
-
-static void fixed_update_core_status(struct kbase_device *kbdev,
-					u64 cores_ready,
-					u64 cores_transitioning)
-{
-	CSTD_UNUSED(kbdev);
-	CSTD_UNUSED(cores_ready);
-	CSTD_UNUSED(cores_transitioning);
-}
-
-/*
- * The struct kbase_pm_policy structure for the fixed power policy.
- *
- * This is the static structure that defines the fixed power policy's callback
- * and name.
- */
-const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
-	"fixed",			/* name */
-	fixed_init,			/* init */
-	fixed_term,			/* term */
-	fixed_get_core_mask,		/* get_core_mask */
-	fixed_update_core_status,	/* update_core_status */
-	0u,				/* flags */
-	KBASE_PM_CA_POLICY_ID_FIXED,	/* id */
-};
-
-KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
deleted file mode 100644
index 68a2eac4..0000000
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-/*
- * A power policy implementing fixed core availability
- */
-
-#ifndef MALI_KBASE_PM_CA_FIXED_H
-#define MALI_KBASE_PM_CA_FIXED_H
-
-/**
- * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data
- *
- * @dummy: Dummy member - no state is needed
- *
- * This contains data that is private to the particular power policy that is
- * active.
- */
-struct kbasep_pm_ca_policy_fixed {
-	int dummy;
-};
-
-extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
-
-#endif /* MALI_KBASE_PM_CA_FIXED_H */
-
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
index 602e175..e90c44d 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,22 +29,14 @@
 #include <mali_kbase.h>
 #include <mali_kbase_pm.h>
 
-static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
+static bool coarse_demand_shaders_needed(struct kbase_device *kbdev)
 {
-	if (kbdev->pm.active_count == 0)
-		return 0;
-
-	return kbdev->gpu_props.props.raw_props.shader_present;
+	return kbase_pm_is_active(kbdev);
 }
 
 static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
 {
-	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
-			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
-			&& !kbdev->tiler_inuse_cnt)
-		return false;
-
-	return true;
+	return kbase_pm_is_active(kbdev);
 }
 
 static void coarse_demand_init(struct kbase_device *kbdev)
@@ -66,7 +58,7 @@
 	"coarse_demand",			/* name */
 	coarse_demand_init,			/* init */
 	coarse_demand_term,			/* term */
-	coarse_demand_get_core_mask,		/* get_core_mask */
+	coarse_demand_shaders_needed,		/* shaders_needed */
 	coarse_demand_get_core_active,		/* get_core_active */
 	0u,					/* flags */
 	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
index f2b49eb..304e5d7 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -35,11 +35,11 @@
  * characteristics:
  * - When KBase indicates that the GPU will be powered up, but we don't yet
  *   know which Job Chains are to be run:
- *  - All Shader Cores are powered up, regardless of whether or not they will
- *    be needed later.
- * - When KBase indicates that a set of Shader Cores are needed to submit the
- *   currently queued Job Chains:
- *  - All Shader Cores are kept powered, regardless of whether or not they will
+ *  - Shader Cores are powered up, regardless of whether or not they will be
+ *    needed later.
+ * - When KBase indicates that Shader Cores are needed to submit the currently
+ *   queued Job Chains:
+ *  - Shader Cores are kept powered, regardless of whether or not they will
  *    be needed
  * - When KBase indicates that the GPU need not be powered:
  *  - The Shader Cores are powered off, and the GPU itself is powered off too.
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
index 6dddb07..7fe8eb3 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,12 +27,6 @@
 #ifndef _KBASE_PM_HWACCESS_DEFS_H_
 #define _KBASE_PM_HWACCESS_DEFS_H_
 
-#include "mali_kbase_pm_ca_fixed.h"
-#include "mali_kbase_pm_ca_devfreq.h"
-#if !MALI_CUSTOMER_RELEASE
-#include "mali_kbase_pm_ca_random.h"
-#endif
-
 #include "mali_kbase_pm_always_on.h"
 #include "mali_kbase_pm_coarse_demand.h"
 #include "mali_kbase_pm_demand.h"
@@ -71,58 +65,67 @@
 };
 
 /**
- * struct kbasep_pm_metrics_data - Metrics data collected for use by the power
- *                                 management framework.
+ * struct kbasep_pm_metrics - Metrics data collected for use by the power
+ *                            management framework.
  *
- *  @time_period_start: time at which busy/idle measurements started
  *  @time_busy: number of ns the GPU was busy executing jobs since the
  *          @time_period_start timestamp.
  *  @time_idle: number of ns since time_period_start the GPU was not executing
  *          jobs since the @time_period_start timestamp.
- *  @prev_busy: busy time in ns of previous time period.
- *           Updated when metrics are reset.
- *  @prev_idle: idle time in ns of previous time period
- *           Updated when metrics are reset.
- *  @gpu_active: true when the GPU is executing jobs. false when
- *           not. Updated when the job scheduler informs us a job in submitted
- *           or removed from a GPU slot.
  *  @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
  *           if two CL jobs were active for 400ns, this value would be updated
  *           with 800.
  *  @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
  *           if two GL jobs were active for 400ns, this value would be updated
  *           with 800.
+ */
+struct kbasep_pm_metrics {
+	u32 time_busy;
+	u32 time_idle;
+	u32 busy_cl[2];
+	u32 busy_gl;
+};
+
+/**
+ * struct kbasep_pm_metrics_state - State required to collect the metrics in
+ *                                  struct kbasep_pm_metrics
+ *  @time_period_start: time at which busy/idle measurements started
+ *  @gpu_active: true when the GPU is executing jobs. false when
+ *           not. Updated when the job scheduler informs us a job in submitted
+ *           or removed from a GPU slot.
  *  @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
  *  @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
  *           GL jobs never run on slot 2 this slot is not recorded.
  *  @lock: spinlock protecting the kbasep_pm_metrics_data structure
+ *  @platform_data: pointer to data controlled by platform specific code
+ *  @kbdev: pointer to kbase device for which metrics are collected
+ *  @values: The current values of the power management metrics. The
+ *           kbase_pm_get_dvfs_metrics() function is used to compare these
+ *           current values with the saved values from a previous invocation.
  *  @timer: timer to regularly make DVFS decisions based on the power
  *           management metrics.
  *  @timer_active: boolean indicating @timer is running
- *  @platform_data: pointer to data controlled by platform specific code
- *  @kbdev: pointer to kbase device for which metrics are collected
- *
+ *  @dvfs_last: values of the PM metrics from the last DVFS tick
+ *  @dvfs_diff: different between the current and previous PM metrics.
  */
-struct kbasep_pm_metrics_data {
+struct kbasep_pm_metrics_state {
 	ktime_t time_period_start;
-	u32 time_busy;
-	u32 time_idle;
-	u32 prev_busy;
-	u32 prev_idle;
 	bool gpu_active;
-	u32 busy_cl[2];
-	u32 busy_gl;
 	u32 active_cl_ctx[2];
 	u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
 	spinlock_t lock;
 
+	void *platform_data;
+	struct kbase_device *kbdev;
+
+	struct kbasep_pm_metrics values;
+
 #ifdef CONFIG_MALI_MIDGARD_DVFS
 	struct hrtimer timer;
 	bool timer_active;
+	struct kbasep_pm_metrics dvfs_last;
+	struct kbasep_pm_metrics dvfs_diff;
 #endif
-
-	void *platform_data;
-	struct kbase_device *kbdev;
 };
 
 union kbase_pm_policy_data {
@@ -135,25 +138,14 @@
 #endif
 };
 
-union kbase_pm_ca_policy_data {
-	struct kbasep_pm_ca_policy_fixed fixed;
-	struct kbasep_pm_ca_policy_devfreq devfreq;
-#if !MALI_CUSTOMER_RELEASE
-	struct kbasep_pm_ca_policy_random random;
-#endif
-};
-
 /**
  * struct kbase_pm_backend_data - Data stored per device for power management.
  *
  * This structure contains data for the power management framework. There is one
  * instance of this structure per device in the system.
  *
- * @ca_current_policy: The policy that is currently actively controlling core
- *                     availability.
  * @pm_current_policy: The policy that is currently actively controlling the
  *                     power state.
- * @ca_policy_data:    Private data for current CA policy
  * @pm_policy_data:    Private data for current PM policy
  * @ca_in_transition:  Flag indicating when core availability policy is
  *                     transitioning cores. The core availability policy must
@@ -243,20 +235,17 @@
  *                              &struct kbase_pm_callback_conf
  * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
  *                              &struct kbase_pm_callback_conf
+ * @ca_cores_enabled: Cores that are currently available
  *
  * Note:
- * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
- * policy is being changed with kbase_pm_ca_set_policy() or
- * kbase_pm_set_policy(). The change is protected under
- * kbase_device.pm.power_change_lock. Direct access to this
- * from IRQ context must therefore check for NULL. If NULL, then
- * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy
- * functions that would have been done under IRQ.
+ * During an IRQ, @pm_current_policy can be NULL when the policy is being
+ * changed with kbase_pm_set_policy(). The change is protected under
+ * kbase_device.pm.power_change_lock. Direct access to this from IRQ context
+ * must therefore check for NULL. If NULL, then kbase_pm_set_policy() will
+ * re-issue the policy functions that would have been done under IRQ.
  */
 struct kbase_pm_backend_data {
-	const struct kbase_pm_ca_policy *ca_current_policy;
 	const struct kbase_pm_policy *pm_current_policy;
-	union kbase_pm_ca_policy_data ca_policy_data;
 	union kbase_pm_policy_data pm_policy_data;
 	bool ca_in_transition;
 	bool reset_done;
@@ -291,7 +280,7 @@
 	spinlock_t gpu_powered_lock;
 
 
-	struct kbasep_pm_metrics_data metrics;
+	struct kbasep_pm_metrics_state metrics;
 
 	int gpu_poweroff_pending;
 	int shader_poweroff_pending_time;
@@ -322,6 +311,10 @@
 	int (*callback_power_runtime_on)(struct kbase_device *kbdev);
 	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
 	int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
+
+#ifdef CONFIG_MALI_DEVFREQ
+	u64 ca_cores_enabled;
+#endif
 };
 
 
@@ -347,7 +340,7 @@
  * @name:               The name of this policy
  * @init:               Function called when the policy is selected
  * @term:               Function called when the policy is unselected
- * @get_core_mask:      Function called to get the current shader core mask
+ * @shaders_needed:     Function called to find out if shader cores are needed
  * @get_core_active:    Function called to get the current overall GPU power
  *                      state
  * @flags:              Field indicating flags for this policy
@@ -382,26 +375,28 @@
 	void (*term)(struct kbase_device *kbdev);
 
 	/**
-	 * Function called to get the current shader core mask
+	 * Function called to find out if shader cores are needed
 	 *
-	 * The returned mask should meet or exceed (kbdev->shader_needed_bitmap
-	 * | kbdev->shader_inuse_bitmap).
+	 * This needs to at least satisfy kbdev->shader_needed_cnt, and so must
+	 * never return false when kbdev->shader_needed_cnt > 0.
+	 *
+	 * Note that kbdev->pm.active_count being 0 is not a good indicator
+	 * that kbdev->shader_needed_cnt is also 0 - refer to the documentation
+	 * on the active_count member in struct kbase_pm_device_data and
+	 * kbase_pm_is_active().
 	 *
 	 * @kbdev: The kbase device structure for the device (must be a
 	 *         valid pointer)
 	 *
-	 * Return: The mask of shader cores to be powered
+	 * Return: true if shader cores are needed, false otherwise
 	 */
-	u64 (*get_core_mask)(struct kbase_device *kbdev);
+	bool (*shaders_needed)(struct kbase_device *kbdev);
 
 	/**
 	 * Function called to get the current overall GPU power state
 	 *
-	 * This function should consider the state of kbdev->pm.active_count. If
-	 * this count is greater than 0 then there is at least one active
-	 * context on the device and the GPU should be powered. If it is equal
-	 * to 0 then there are no active contexts and the GPU could be powered
-	 * off if desired.
+	 * This function must meet or exceed the requirements for power
+	 * indicated by kbase_pm_is_active().
 	 *
 	 * @kbdev: The kbase device structure for the device (must be a
 	 *         valid pointer)
@@ -414,111 +409,4 @@
 	enum kbase_pm_policy_id id;
 };
 
-
-enum kbase_pm_ca_policy_id {
-	KBASE_PM_CA_POLICY_ID_FIXED = 1,
-	KBASE_PM_CA_POLICY_ID_DEVFREQ,
-	KBASE_PM_CA_POLICY_ID_RANDOM
-};
-
-typedef u32 kbase_pm_ca_policy_flags;
-
-/**
- * Maximum length of a CA policy names
- */
-#define KBASE_PM_CA_MAX_POLICY_NAME_LEN 15
-
-/**
- * struct kbase_pm_ca_policy - Core availability policy structure.
- *
- * Each core availability policy exposes a (static) instance of this structure
- * which contains function pointers to the policy's methods.
- *
- * @name:               The name of this policy
- * @init:               Function called when the policy is selected
- * @term:               Function called when the policy is unselected
- * @get_core_mask:      Function called to get the current shader core
- *                      availability mask
- * @update_core_status: Function called to update the current core status
- * @flags:              Field indicating flags for this policy
- * @id:                 Field indicating an ID for this policy. This is not
- *                      necessarily the same as its index in the list returned
- *                      by kbase_pm_list_policies().
- *                      It is used purely for debugging.
- */
-struct kbase_pm_ca_policy {
-	char name[KBASE_PM_CA_MAX_POLICY_NAME_LEN + 1];
-
-	/**
-	 * Function called when the policy is selected
-	 *
-	 * This should initialize the kbdev->pm.ca_policy_data structure. It
-	 * should not attempt to make any changes to hardware state.
-	 *
-	 * It is undefined what state the cores are in when the function is
-	 * called.
-	 *
-	 * @kbdev The kbase device structure for the device (must be a
-	 *        valid pointer)
-	 */
-	void (*init)(struct kbase_device *kbdev);
-
-	/**
-	 * Function called when the policy is unselected.
-	 *
-	 * @kbdev The kbase device structure for the device (must be a
-	 *        valid pointer)
-	 */
-	void (*term)(struct kbase_device *kbdev);
-
-	/**
-	 * Function called to get the current shader core availability mask
-	 *
-	 * When a change in core availability is occurring, the policy must set
-	 * kbdev->pm.ca_in_transition to true. This is to indicate that
-	 * reporting changes in power state cannot be optimized out, even if
-	 * kbdev->pm.desired_shader_state remains unchanged. This must be done
-	 * by any functions internal to the Core Availability Policy that change
-	 * the return value of kbase_pm_ca_policy::get_core_mask.
-	 *
-	 * @kbdev The kbase device structure for the device (must be a
-	 *              valid pointer)
-	 *
-	 * Return: The current core availability mask
-	 */
-	u64 (*get_core_mask)(struct kbase_device *kbdev);
-
-	/**
-	 * Function called to update the current core status
-	 *
-	 * If none of the cores in core group 0 are ready or transitioning, then
-	 * the policy must ensure that the next call to get_core_mask does not
-	 * return 0 for all cores in core group 0. It is an error to disable
-	 * core group 0 through the core availability policy.
-	 *
-	 * When a change in core availability has finished, the policy must set
-	 * kbdev->pm.ca_in_transition to false. This is to indicate that
-	 * changes in power state can once again be optimized out when
-	 * kbdev->pm.desired_shader_state is unchanged.
-	 *
-	 * @kbdev:               The kbase device structure for the device
-	 *                       (must be a valid pointer)
-	 * @cores_ready:         The mask of cores currently powered and
-	 *                       ready to run jobs
-	 * @cores_transitioning: The mask of cores currently transitioning
-	 *                       power state
-	 */
-	void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
-						u64 cores_transitioning);
-
-	kbase_pm_ca_policy_flags flags;
-
-	/**
-	 * Field indicating an ID for this policy. This is not necessarily the
-	 * same as its index in the list returned by kbase_pm_list_policies().
-	 * It is used purely for debugging.
-	 */
-	enum kbase_pm_ca_policy_id id;
-};
-
 #endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
index e0edddc..01727d6 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,24 +29,14 @@
 #include <mali_kbase.h>
 #include <mali_kbase_pm.h>
 
-static u64 demand_get_core_mask(struct kbase_device *kbdev)
+static bool demand_shaders_needed(struct kbase_device *kbdev)
 {
-	u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
-
-	if (0 == kbdev->pm.active_count)
-		return 0;
-
-	return desired;
+	return (kbdev->shader_needed_cnt > 0);
 }
 
 static bool demand_get_core_active(struct kbase_device *kbdev)
 {
-	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
-			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
-			&& !kbdev->tiler_inuse_cnt)
-		return false;
-
-	return true;
+	return kbase_pm_is_active(kbdev);
 }
 
 static void demand_init(struct kbase_device *kbdev)
@@ -69,7 +59,7 @@
 	"demand",			/* name */
 	demand_init,			/* init */
 	demand_term,			/* term */
-	demand_get_core_mask,		/* get_core_mask */
+	demand_shaders_needed,		/* shaders_needed */
 	demand_get_core_active,		/* get_core_active */
 	0u,				/* flags */
 	KBASE_PM_POLICY_ID_DEMAND,	/* id */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
index 5ee1824..4b05e6d 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -37,9 +37,9 @@
  *   know which Job Chains are to be run:
  *  - The Shader Cores are not powered up
  *
- * - When KBase indicates that a set of Shader Cores are needed to submit the
- *   currently queued Job Chains:
- *  - Only those Shader Cores are powered up
+ * - When KBase indicates that Shader Cores are needed to submit the currently
+ *   queued Job Chains:
+ *  - Shader Cores are powered up
  *
  * - When KBase indicates that the GPU need not be powered:
  *  - The Shader Cores are powered off, and the GPU itself is powered off too.
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
index 44803ab..cdd5cf7 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -135,19 +135,16 @@
 
 	kbase_reg_write(kbdev,
 			GPU_CONTROL_REG(GPU_COMMAND),
-			GPU_COMMAND_CLEAN_INV_CACHES,
-			NULL);
+			GPU_COMMAND_CLEAN_INV_CACHES);
 
 	raw = kbase_reg_read(kbdev,
-		GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
-		NULL);
+		GPU_CONTROL_REG(GPU_IRQ_RAWSTAT));
 
 	/* Wait for cache flush to complete before continuing, exit on
 	 * gpu resets or loop expiry. */
 	while (((raw & mask) == 0) && --loops) {
 		raw = kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
-					NULL);
+					GPU_CONTROL_REG(GPU_IRQ_RAWSTAT));
 	}
 }
 #endif
@@ -238,10 +235,10 @@
 	}
 
 	if (lo != 0)
-		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo);
 
 	if (hi != 0)
-		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi);
 }
 
 /**
@@ -269,24 +266,20 @@
 
 	KBASE_DEBUG_ASSERT(reg);
 
-	lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL);
-	hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL);
+	lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg));
+	hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4));
 
 	return (((u64) hi) << 32) | ((u64) lo);
 }
 
 void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev)
 {
-	kbdev->shader_inuse_bitmap = 0;
-	kbdev->shader_needed_bitmap = 0;
 	kbdev->shader_available_bitmap = 0;
 	kbdev->tiler_available_bitmap = 0;
 	kbdev->l2_users_count = 0;
 	kbdev->l2_available_bitmap = 0;
 	kbdev->tiler_needed_cnt = 0;
-	kbdev->tiler_inuse_cnt = 0;
-
-	memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt));
+	kbdev->shader_needed_cnt = 0;
 }
 
 /**
@@ -438,19 +431,21 @@
 	present = kbase_pm_get_present_cores(kbdev, type);
 	trans = kbase_pm_get_trans_cores(kbdev, type);
 	ready = kbase_pm_get_ready_cores(kbdev, type);
+
 	/* mask off ready from trans in case transitions finished between the
 	 * register reads */
 	trans &= ~ready;
 
-	if (trans) /* Do not progress if any cores are transitioning */
-		return false;
-
 	powering_on_trans = trans & *powering_on;
-	*powering_on = powering_on_trans;
 
 	if (available != NULL)
 		*available = (ready | powering_on_trans) & desired_state;
 
+	if (trans) /* Do not progress if any cores are transitioning */
+		return false;
+
+	*powering_on = powering_on_trans;
+
 	/* Update desired state to include the in-use cores. These have to be
 	 * kept powered up because there are jobs running or about to run on
 	 * these cores
@@ -632,15 +627,6 @@
 		return false;
 	}
 
-	/* Trace that a change-state is being requested, and that it took
-	 * (effectively) no time to start it. This is useful for counting how
-	 * many state changes occurred, in a way that's backwards-compatible
-	 * with processing the trace data */
-	kbase_timeline_pm_send_event(kbdev,
-				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
-	kbase_timeline_pm_handle_event(kbdev,
-				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
-
 	/* If any cores are already powered then, we must keep the caches on */
 	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
 							KBASE_PM_CORE_SHADER);
@@ -689,9 +675,6 @@
 			&l2_available_bitmap,
 			&kbdev->pm.backend.powering_on_l2_state);
 
-	if (kbdev->l2_available_bitmap != l2_available_bitmap)
-		KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap);
-
 	kbdev->l2_available_bitmap = l2_available_bitmap;
 
 
@@ -713,27 +696,20 @@
 		in_desired_state &= kbase_pm_transition_core_type(kbdev,
 				KBASE_PM_CORE_SHADER,
 				kbdev->pm.backend.desired_shader_state,
-				kbdev->shader_inuse_bitmap,
-				&shader_available_bitmap,
+				0, &shader_available_bitmap,
 				&kbdev->pm.backend.powering_on_shader_state);
 
-		if (kbdev->shader_available_bitmap != shader_available_bitmap) {
+		if (kbdev->shader_available_bitmap != shader_available_bitmap)
 			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
 						NULL, 0u,
 						(u32) shader_available_bitmap);
-			KBASE_TIMELINE_POWER_SHADER(kbdev,
-						shader_available_bitmap);
-		}
 
 		kbdev->shader_available_bitmap = shader_available_bitmap;
 
-		if (kbdev->tiler_available_bitmap != tiler_available_bitmap) {
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
 			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
 						NULL, NULL, 0u,
 						(u32) tiler_available_bitmap);
-			KBASE_TIMELINE_POWER_TILER(kbdev,
-							tiler_available_bitmap);
-		}
 
 		kbdev->tiler_available_bitmap = tiler_available_bitmap;
 
@@ -742,10 +718,6 @@
 			kbdev->gpu_props.props.raw_props.tiler_present) {
 		tiler_available_bitmap = 0;
 
-		if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
-			KBASE_TIMELINE_POWER_TILER(kbdev,
-							tiler_available_bitmap);
-
 		kbdev->tiler_available_bitmap = tiler_available_bitmap;
 	}
 
@@ -774,13 +746,6 @@
 		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u,
 				(u32)(kbdev->tiler_available_bitmap &
 				kbdev->pm.backend.desired_tiler_state));
-
-		/* Log timelining information about handling events that power
-		 * up cores, to match up either with immediate submission either
-		 * because cores already available, or from PM IRQ */
-		if (!in_desired_state)
-			kbase_timeline_pm_send_event(kbdev,
-				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
 	}
 
 	if (in_desired_state) {
@@ -830,9 +795,6 @@
 		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u,
 				(u32)kbdev->pm.backend.desired_tiler_state);
 
-		/* Log timelining information for synchronous waiters */
-		kbase_timeline_pm_send_event(kbdev,
-				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
 		/* Wake slow-path waiters. Job scheduler does not use this. */
 		KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
 
@@ -841,19 +803,8 @@
 
 	spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
 
-	/* kbase_pm_ca_update_core_status can cause one-level recursion into
-	 * this function, so it must only be called once all changes to kbdev
-	 * have been committed, and after the gpu_powered_lock has been
-	 * dropped. */
-	if (kbdev->shader_ready_bitmap != shader_ready_bitmap ||
-	    kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) {
-		kbdev->shader_ready_bitmap = shader_ready_bitmap;
-		kbdev->shader_transitioning_bitmap =
-						shader_transitioning_bitmap;
-
-		kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap,
-						shader_transitioning_bitmap);
-	}
+	kbdev->shader_ready_bitmap = shader_ready_bitmap;
+	kbdev->shader_transitioning_bitmap = shader_transitioning_bitmap;
 
 	/* The core availability policy is not allowed to keep core group 0
 	 * turned off (unless it was changing the l2 power state) */
@@ -916,46 +867,40 @@
 		dev_err(kbdev->dev, "Current state :\n");
 		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
 				kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(SHADER_READY_HI), NULL),
+					GPU_CONTROL_REG(SHADER_READY_HI)),
 				kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(SHADER_READY_LO),
-					NULL));
+					GPU_CONTROL_REG(SHADER_READY_LO)));
 		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
 				kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(TILER_READY_HI), NULL),
+					GPU_CONTROL_REG(TILER_READY_HI)),
 				kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(TILER_READY_LO), NULL));
+					GPU_CONTROL_REG(TILER_READY_LO)));
 		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
 				kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(L2_READY_HI), NULL),
+					GPU_CONTROL_REG(L2_READY_HI)),
 				kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(L2_READY_LO), NULL));
+					GPU_CONTROL_REG(L2_READY_LO)));
 		dev_err(kbdev->dev, "Cores transitioning :\n");
 		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
 				kbase_reg_read(kbdev, GPU_CONTROL_REG(
-						SHADER_PWRTRANS_HI), NULL),
+						SHADER_PWRTRANS_HI)),
 				kbase_reg_read(kbdev, GPU_CONTROL_REG(
-						SHADER_PWRTRANS_LO), NULL));
+						SHADER_PWRTRANS_LO)));
 		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
 				kbase_reg_read(kbdev, GPU_CONTROL_REG(
-						TILER_PWRTRANS_HI), NULL),
+						TILER_PWRTRANS_HI)),
 				kbase_reg_read(kbdev, GPU_CONTROL_REG(
-						TILER_PWRTRANS_LO), NULL));
+						TILER_PWRTRANS_LO)));
 		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
 				kbase_reg_read(kbdev, GPU_CONTROL_REG(
-						L2_PWRTRANS_HI), NULL),
+						L2_PWRTRANS_HI)),
 				kbase_reg_read(kbdev, GPU_CONTROL_REG(
-						L2_PWRTRANS_LO), NULL));
+						L2_PWRTRANS_LO)));
 #if KBASE_GPU_RESET_EN
 		dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
 		if (kbase_prepare_to_reset_gpu(kbdev))
 			kbase_reset_gpu(kbdev);
 #endif /* KBASE_GPU_RESET_EN */
-	} else {
-		/* Log timelining information that a change in state has
-		 * completed */
-		kbase_timeline_pm_handle_event(kbdev,
-				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
 	}
 }
 KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync);
@@ -970,18 +915,15 @@
 	 * and unmask them all.
 	 */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
-									NULL);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
-									NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
-	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
-									NULL);
-	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF);
 
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF);
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
@@ -995,15 +937,13 @@
 	 */
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
-									NULL);
-	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL);
-	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
-									NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF);
 
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF);
 }
 
 void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
@@ -1027,11 +967,10 @@
 void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
 {
 	bool reset_required = is_resume;
-	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
 	unsigned long flags;
 
 	KBASE_DEBUG_ASSERT(NULL != kbdev);
-	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
 	lockdep_assert_held(&kbdev->pm.lock);
 
 	if (kbdev->pm.backend.gpu_powered) {
@@ -1219,10 +1158,10 @@
 
 	if (!kbdev->hw_quirks_sc)
 		kbdev->hw_quirks_sc = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(SHADER_CONFIG), NULL);
+				GPU_CONTROL_REG(SHADER_CONFIG));
 
 	kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
-			GPU_CONTROL_REG(TILER_CONFIG), NULL);
+			GPU_CONTROL_REG(TILER_CONFIG));
 
 	/* Set tiler clock gate override if required */
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
@@ -1230,7 +1169,7 @@
 
 	/* Limit the GPU bus bandwidth if the platform needs this. */
 	kbdev->hw_quirks_mmu = kbase_reg_read(kbdev,
-			GPU_CONTROL_REG(L2_MMU_CONFIG), NULL);
+			GPU_CONTROL_REG(L2_MMU_CONFIG));
 
 
 	/* Limit read & write ID width for AXI */
@@ -1297,7 +1236,7 @@
 		u32 coherency_features;
 
 		coherency_features = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+				GPU_CONTROL_REG(COHERENCY_FEATURES));
 
 		/* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
 		 * documented for tMIx so force correct value here.
@@ -1315,7 +1254,7 @@
 
 	if (!kbdev->hw_quirks_jm)
 		kbdev->hw_quirks_jm = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(JM_CONFIG), NULL);
+				GPU_CONTROL_REG(JM_CONFIG));
 
 #ifdef CONFIG_MALI_CORESTACK
 #define MANUAL_POWER_CONTROL ((u32)(1 << 8))
@@ -1326,16 +1265,16 @@
 static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
 {
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG),
-			kbdev->hw_quirks_sc, NULL);
+			kbdev->hw_quirks_sc);
 
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG),
-			kbdev->hw_quirks_tiler, NULL);
+			kbdev->hw_quirks_tiler);
 
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG),
-			kbdev->hw_quirks_mmu, NULL);
+			kbdev->hw_quirks_mmu);
 
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG),
-			kbdev->hw_quirks_jm, NULL);
+			kbdev->hw_quirks_jm);
 
 }
 
@@ -1375,11 +1314,10 @@
 	KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev);
 
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-						GPU_COMMAND_SOFT_RESET, NULL);
+						GPU_COMMAND_SOFT_RESET);
 
 	/* Unmask the reset complete interrupt only */
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED,
-									NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED);
 
 	/* Initialize a structure for tracking the status of the reset */
 	rtdata.kbdev = kbdev;
@@ -1404,7 +1342,7 @@
 
 	/* No interrupt has been received - check if the RAWSTAT register says
 	 * the reset has completed */
-	if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+	if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) &
 							RESET_COMPLETED) {
 		/* The interrupt is set in the RAWSTAT; this suggests that the
 		 * interrupts are not getting to the CPU */
@@ -1420,7 +1358,7 @@
 								RESET_TIMEOUT);
 	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-						GPU_COMMAND_HARD_RESET, NULL);
+						GPU_COMMAND_HARD_RESET);
 
 	/* Restart the timer to wait for the hard reset to complete */
 	rtdata.timed_out = 0;
@@ -1451,7 +1389,7 @@
 	struct kbase_device *kbdev = pdev->data;
 
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-		GPU_COMMAND_SET_PROTECTED_MODE, NULL);
+		GPU_COMMAND_SET_PROTECTED_MODE);
 	return 0;
 }
 
@@ -1522,7 +1460,6 @@
 	if (kbdev->protected_mode)
 		resume_vinstr = true;
 	kbdev->protected_mode = false;
-	kbase_ipa_model_use_configured_locked(kbdev);
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
 
@@ -1538,7 +1475,7 @@
 	/* Sanity check protected mode was left after reset */
 	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
 		u32 gpu_status = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(GPU_STATUS), NULL);
+				GPU_CONTROL_REG(GPU_STATUS));
 
 		WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE);
 	}
@@ -1557,7 +1494,7 @@
 								irq_flags);
 		if (kbdev->pm.backend.gpu_cycle_counter_requests)
 			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+					GPU_COMMAND_CYCLE_COUNT_START);
 		spin_unlock_irqrestore(
 			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
 								irq_flags);
@@ -1608,7 +1545,7 @@
 
 	if (1 == kbdev->pm.backend.gpu_cycle_counter_requests)
 		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+					GPU_COMMAND_CYCLE_COUNT_START);
 
 	spin_unlock_irqrestore(
 			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
@@ -1664,7 +1601,7 @@
 
 	if (0 == kbdev->pm.backend.gpu_cycle_counter_requests)
 		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-					GPU_COMMAND_CYCLE_COUNT_STOP, NULL);
+					GPU_COMMAND_CYCLE_COUNT_STOP);
 
 	spin_unlock_irqrestore(
 			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
index 831971b..0d3599a 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -507,9 +507,9 @@
 void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
 
 #if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
-void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
-		unsigned long *total, unsigned long *busy);
-void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev);
+void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
+			       struct kbasep_pm_metrics *last,
+			       struct kbasep_pm_metrics *diff);
 #endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */
 
 #ifdef CONFIG_MALI_MIDGARD_DVFS
@@ -565,4 +565,16 @@
  */
 void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
 
+#ifdef CONFIG_MALI_DEVFREQ
+/**
+ * kbase_devfreq_set_core_mask - Set devfreq core mask
+ * @kbdev:     Device pointer
+ * @core_mask: New core mask
+ *
+ * This function is used by devfreq to change the available core mask as
+ * required by Dynamic Core Scaling.
+ */
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask);
+#endif
+
 #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
index a8020b6..6b9b686 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,6 +30,7 @@
 #include <mali_kbase_pm.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 #include <backend/gpu/mali_kbase_jm_rb.h>
+#include <backend/gpu/mali_kbase_pm_defs.h>
 
 /* When VSync is being hit aim for utilisation between 70-90% */
 #define KBASE_PM_VSYNC_MIN_UTILISATION          70
@@ -43,19 +44,15 @@
  * under 11s. Exceeding this will cause overflow */
 #define KBASE_PM_TIME_SHIFT			8
 
-/* Maximum time between sampling of utilization data, without resetting the
- * counters. */
-#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */
-
 #ifdef CONFIG_MALI_MIDGARD_DVFS
 static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
 {
 	unsigned long flags;
-	struct kbasep_pm_metrics_data *metrics;
+	struct kbasep_pm_metrics_state *metrics;
 
 	KBASE_DEBUG_ASSERT(timer != NULL);
 
-	metrics = container_of(timer, struct kbasep_pm_metrics_data, timer);
+	metrics = container_of(timer, struct kbasep_pm_metrics_state, timer);
 	kbase_pm_get_dvfs_action(metrics->kbdev);
 
 	spin_lock_irqsave(&metrics->lock, flags);
@@ -78,18 +75,17 @@
 	kbdev->pm.backend.metrics.kbdev = kbdev;
 
 	kbdev->pm.backend.metrics.time_period_start = ktime_get();
-	kbdev->pm.backend.metrics.time_busy = 0;
-	kbdev->pm.backend.metrics.time_idle = 0;
-	kbdev->pm.backend.metrics.prev_busy = 0;
-	kbdev->pm.backend.metrics.prev_idle = 0;
 	kbdev->pm.backend.metrics.gpu_active = false;
 	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
 	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
 	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
 	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
-	kbdev->pm.backend.metrics.busy_cl[0] = 0;
-	kbdev->pm.backend.metrics.busy_cl[1] = 0;
-	kbdev->pm.backend.metrics.busy_gl = 0;
+
+	kbdev->pm.backend.metrics.values.time_busy = 0;
+	kbdev->pm.backend.metrics.values.time_idle = 0;
+	kbdev->pm.backend.metrics.values.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.values.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.values.busy_gl = 0;
 
 	spin_lock_init(&kbdev->pm.backend.metrics.lock);
 
@@ -143,17 +139,17 @@
 	if (kbdev->pm.backend.metrics.gpu_active) {
 		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
 
-		kbdev->pm.backend.metrics.time_busy += ns_time;
+		kbdev->pm.backend.metrics.values.time_busy += ns_time;
 		if (kbdev->pm.backend.metrics.active_cl_ctx[0])
-			kbdev->pm.backend.metrics.busy_cl[0] += ns_time;
+			kbdev->pm.backend.metrics.values.busy_cl[0] += ns_time;
 		if (kbdev->pm.backend.metrics.active_cl_ctx[1])
-			kbdev->pm.backend.metrics.busy_cl[1] += ns_time;
+			kbdev->pm.backend.metrics.values.busy_cl[1] += ns_time;
 		if (kbdev->pm.backend.metrics.active_gl_ctx[0])
-			kbdev->pm.backend.metrics.busy_gl += ns_time;
+			kbdev->pm.backend.metrics.values.busy_gl += ns_time;
 		if (kbdev->pm.backend.metrics.active_gl_ctx[1])
-			kbdev->pm.backend.metrics.busy_gl += ns_time;
+			kbdev->pm.backend.metrics.values.busy_gl += ns_time;
 	} else {
-		kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff)
+		kbdev->pm.backend.metrics.values.time_idle += (u32) (ktime_to_ns(diff)
 							>> KBASE_PM_TIME_SHIFT);
 	}
 
@@ -161,160 +157,53 @@
 }
 
 #if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
-/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this
- * function.
- */
-static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev,
-								ktime_t now)
+void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
+			       struct kbasep_pm_metrics *last,
+			       struct kbasep_pm_metrics *diff)
 {
-	/* Store previous value */
-	kbdev->pm.backend.metrics.prev_idle =
-					kbdev->pm.backend.metrics.time_idle;
-	kbdev->pm.backend.metrics.prev_busy =
-					kbdev->pm.backend.metrics.time_busy;
-
-	/* Reset current values */
-	kbdev->pm.backend.metrics.time_period_start = now;
-	kbdev->pm.backend.metrics.time_idle = 0;
-	kbdev->pm.backend.metrics.time_busy = 0;
-	kbdev->pm.backend.metrics.busy_cl[0] = 0;
-	kbdev->pm.backend.metrics.busy_cl[1] = 0;
-	kbdev->pm.backend.metrics.busy_gl = 0;
-}
-
-void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev)
-{
+	struct kbasep_pm_metrics *cur = &kbdev->pm.backend.metrics.values;
 	unsigned long flags;
 
 	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
-	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get());
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get());
+
+	memset(diff, 0, sizeof(*diff));
+	diff->time_busy = cur->time_busy - last->time_busy;
+	diff->time_idle = cur->time_idle - last->time_idle;
+	diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0];
+	diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1];
+	diff->busy_gl = cur->busy_gl - last->busy_gl;
+
+	*last = *cur;
+
 	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
 }
-
-void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
-		unsigned long *total_out, unsigned long *busy_out)
-{
-	ktime_t now = ktime_get();
-	unsigned long flags, busy, total;
-
-	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
-	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
-
-	busy = kbdev->pm.backend.metrics.time_busy;
-	total = busy + kbdev->pm.backend.metrics.time_idle;
-
-	/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
-	 * 100ms) */
-	if (total >= MALI_UTILIZATION_MAX_PERIOD) {
-		kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
-	} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
-		total += kbdev->pm.backend.metrics.prev_idle +
-				kbdev->pm.backend.metrics.prev_busy;
-		busy += kbdev->pm.backend.metrics.prev_busy;
-	}
-
-	*total_out = total;
-	*busy_out = busy;
-	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
-}
+KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics);
 #endif
 
 #ifdef CONFIG_MALI_MIDGARD_DVFS
-
-/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
- * function
- */
-int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev,
-					int *util_gl_share,
-					int util_cl_share[2],
-					ktime_t now)
-{
-	int utilisation;
-	int busy;
-
-	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
-
-	if (kbdev->pm.backend.metrics.time_idle +
-				kbdev->pm.backend.metrics.time_busy == 0) {
-		/* No data - so we return NOP */
-		utilisation = -1;
-		if (util_gl_share)
-			*util_gl_share = -1;
-		if (util_cl_share) {
-			util_cl_share[0] = -1;
-			util_cl_share[1] = -1;
-		}
-		goto out;
-	}
-
-	utilisation = (100 * kbdev->pm.backend.metrics.time_busy) /
-			(kbdev->pm.backend.metrics.time_idle +
-			 kbdev->pm.backend.metrics.time_busy);
-
-	busy = kbdev->pm.backend.metrics.busy_gl +
-		kbdev->pm.backend.metrics.busy_cl[0] +
-		kbdev->pm.backend.metrics.busy_cl[1];
-
-	if (busy != 0) {
-		if (util_gl_share)
-			*util_gl_share =
-				(100 * kbdev->pm.backend.metrics.busy_gl) /
-									busy;
-		if (util_cl_share) {
-			util_cl_share[0] =
-				(100 * kbdev->pm.backend.metrics.busy_cl[0]) /
-									busy;
-			util_cl_share[1] =
-				(100 * kbdev->pm.backend.metrics.busy_cl[1]) /
-									busy;
-		}
-	} else {
-		if (util_gl_share)
-			*util_gl_share = -1;
-		if (util_cl_share) {
-			util_cl_share[0] = -1;
-			util_cl_share[1] = -1;
-		}
-	}
-
-out:
-	return utilisation;
-}
-
 void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
 {
-	unsigned long flags;
 	int utilisation, util_gl_share;
 	int util_cl_share[2];
-	ktime_t now;
+	int busy;
+	struct kbasep_pm_metrics *diff;
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 
-	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	diff = &kbdev->pm.backend.metrics.dvfs_diff;
 
-	now = ktime_get();
+	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, diff);
 
-	utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share,
-			util_cl_share, now);
+	utilisation = (100 * diff->time_busy) /
+			max(diff->time_busy + diff->time_idle, 1u);
 
-	if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 ||
-							util_cl_share[1] < 0) {
-		utilisation = 0;
-		util_gl_share = 0;
-		util_cl_share[0] = 0;
-		util_cl_share[1] = 0;
-		goto out;
-	}
+	busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u);
+	util_gl_share = (100 * diff->busy_gl) / busy;
+	util_cl_share[0] = (100 * diff->busy_cl[0]) / busy;
+	util_cl_share[1] = (100 * diff->busy_cl[1]) / busy;
 
-out:
-#ifdef CONFIG_MALI_MIDGARD_DVFS
-	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share,
-								util_cl_share);
-#endif				/*CONFIG_MALI_MIDGARD_DVFS */
-
-	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
-
-	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share);
 }
 
 bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
index f1e73a1c..6dd00a9 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -89,72 +89,6 @@
 };
 typedef u32 kbase_pm_change_state;
 
-
-#ifdef CONFIG_MALI_TRACE_TIMELINE
-/* Timeline Trace code lookups for each function */
-static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
-					[KBASE_PM_CHANGE_STATE_COUNT] = {
-	/* kbase_pm_request_cores */
-	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
-	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
-		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
-	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
-		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
-	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
-						KBASE_PM_CHANGE_STATE_TILER] =
-		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
-
-	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
-	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
-		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
-	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
-		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
-	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
-						KBASE_PM_CHANGE_STATE_TILER] =
-		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
-
-	/* kbase_pm_release_cores */
-	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
-	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
-		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
-	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
-		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
-	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
-						KBASE_PM_CHANGE_STATE_TILER] =
-		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
-
-	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
-	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
-		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
-	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
-		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
-	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
-						KBASE_PM_CHANGE_STATE_TILER] =
-		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
-};
-
-static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
-		enum kbase_pm_func_id func_id,
-		kbase_pm_change_state state)
-{
-	int trace_code;
-
-	KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
-	KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
-									state);
-
-	trace_code = kbase_pm_change_state_trace_code[func_id][state];
-	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
-}
-
-#else /* CONFIG_MALI_TRACE_TIMELINE */
-static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
-		enum kbase_pm_func_id func_id, kbase_pm_change_state state)
-{
-}
-
-#endif /* CONFIG_MALI_TRACE_TIMELINE */
-
 /**
  * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
  *                               requested shader cores
@@ -181,11 +115,7 @@
 			kbdev->pm.backend.ca_in_transition) {
 		bool cores_are_available;
 
-		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
-			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
 		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
-		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
-			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
 
 		/* Don't need 'cores_are_available',
 		 * because we don't return anything */
@@ -356,6 +286,9 @@
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 	active = backend->pm_current_policy->get_core_active(kbdev);
+	WARN((kbase_pm_is_active(kbdev) && !active),
+		"GPU is active but policy '%s' is indicating that it can be powered off",
+		kbdev->pm.backend.pm_current_policy->name);
 
 	if (active) {
 		if (backend->gpu_poweroff_pending) {
@@ -383,6 +316,7 @@
 
 		/* Power on the GPU and any cores requested by the policy */
 		if (pm->backend.poweroff_wait_in_progress) {
+			KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
 			pm->backend.poweron_required = true;
 			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		} else {
@@ -435,6 +369,42 @@
 	}
 }
 
+/**
+ * get_desired_shader_bitmap - Get the desired shader bitmap, based on the
+ *                             current power policy
+ *
+ * @kbdev: The kbase device structure for the device
+ *
+ * Queries the current power policy to determine if shader cores will be
+ * required in the current state, and apply any HW workarounds.
+ *
+ * Return: bitmap of desired shader cores
+ */
+
+static u64 get_desired_shader_bitmap(struct kbase_device *kbdev)
+{
+	u64 desired_bitmap = 0u;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev))
+		desired_bitmap = kbase_pm_ca_get_core_mask(kbdev);
+
+	WARN(!desired_bitmap && kbdev->shader_needed_cnt,
+			"Shader cores are needed but policy '%s' did not make them needed",
+			kbdev->pm.backend.pm_current_policy->name);
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
+		/* Unless XAFFINITY is supported, enable core 0 if tiler
+		 * required, regardless of core availability
+		 */
+		if (kbdev->tiler_needed_cnt > 0)
+			desired_bitmap |= 1;
+	}
+
+	return desired_bitmap;
+}
+
 void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
 {
 	u64 desired_bitmap;
@@ -449,30 +419,19 @@
 	if (kbdev->pm.backend.poweroff_wait_in_progress)
 		return;
 
-	if (kbdev->protected_mode_transition &&	!kbdev->shader_needed_bitmap &&
-			!kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt
-			&& !kbdev->tiler_inuse_cnt) {
+	if (kbdev->protected_mode_transition && !kbdev->shader_needed_cnt &&
+			!kbdev->tiler_needed_cnt) {
 		/* We are trying to change in/out of protected mode - force all
 		 * cores off so that the L2 powers down */
 		desired_bitmap = 0;
 		desired_tiler_bitmap = 0;
 	} else {
-		desired_bitmap =
-		kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
-		desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+		desired_bitmap = get_desired_shader_bitmap(kbdev);
 
-		if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+		if (kbdev->tiler_needed_cnt > 0)
 			desired_tiler_bitmap = 1;
 		else
 			desired_tiler_bitmap = 0;
-
-		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
-			/* Unless XAFFINITY is supported, enable core 0 if tiler
-			 * required, regardless of core availability */
-			if (kbdev->tiler_needed_cnt > 0 ||
-					kbdev->tiler_inuse_cnt > 0)
-				desired_bitmap |= 1;
-		}
 	}
 
 	if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
@@ -649,55 +608,28 @@
 
 KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
 
-/* Check whether a state change has finished, and trace it as completed */
-static void
-kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
-{
-	if ((kbdev->shader_available_bitmap &
-					kbdev->pm.backend.desired_shader_state)
-				== kbdev->pm.backend.desired_shader_state &&
-		(kbdev->tiler_available_bitmap &
-					kbdev->pm.backend.desired_tiler_state)
-				== kbdev->pm.backend.desired_tiler_state)
-		kbase_timeline_pm_check_handle_event(kbdev,
-				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
-}
-
 void kbase_pm_request_cores(struct kbase_device *kbdev,
-				bool tiler_required, u64 shader_cores)
+				bool tiler_required, bool shader_required)
 {
-	u64 cores;
-
 	kbase_pm_change_state change_gpu_state = 0u;
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	cores = shader_cores;
-	while (cores) {
-		int bitnum = fls64(cores) - 1;
-		u64 bit = 1ULL << bitnum;
+	if (shader_required) {
+		int cnt = ++kbdev->shader_needed_cnt;
 
-		/* It should be almost impossible for this to overflow. It would
-		 * require 2^32 atoms to request a particular core, which would
-		 * require 2^24 contexts to submit. This would require an amount
-		 * of memory that is impossible on a 32-bit system and extremely
-		 * unlikely on a 64-bit system. */
-		int cnt = ++kbdev->shader_needed_cnt[bitnum];
-
-		if (1 == cnt) {
-			kbdev->shader_needed_bitmap |= bit;
+		if (cnt == 1)
 			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
-		}
 
-		cores &= ~bit;
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt != 0);
 	}
 
 	if (tiler_required) {
 		int cnt = ++kbdev->tiler_needed_cnt;
 
-		if (1 == cnt)
+		if (cnt == 1)
 			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
 
 		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
@@ -705,22 +637,18 @@
 
 	if (change_gpu_state) {
 		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
-				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+				NULL, 0u, kbdev->shader_needed_cnt);
+		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_TILER_NEEDED, NULL,
+				NULL, 0u, kbdev->tiler_needed_cnt);
 
-		kbase_timeline_pm_cores_func(kbdev,
-					KBASE_PM_FUNC_ID_REQUEST_CORES_START,
-							change_gpu_state);
 		kbase_pm_update_cores_state_nolock(kbdev);
-		kbase_timeline_pm_cores_func(kbdev,
-					KBASE_PM_FUNC_ID_REQUEST_CORES_END,
-							change_gpu_state);
 	}
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
 
-void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
-				bool tiler_required, u64 shader_cores)
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, bool shader_required)
 {
 	kbase_pm_change_state change_gpu_state = 0u;
 
@@ -728,22 +656,16 @@
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	while (shader_cores) {
-		int bitnum = fls64(shader_cores) - 1;
-		u64 bit = 1ULL << bitnum;
+	if (shader_required) {
 		int cnt;
 
-		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt > 0);
 
-		cnt = --kbdev->shader_needed_cnt[bitnum];
+		cnt = --kbdev->shader_needed_cnt;
 
 		if (0 == cnt) {
-			kbdev->shader_needed_bitmap &= ~bit;
-
 			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
 		}
-
-		shader_cores &= ~bit;
 	}
 
 	if (tiler_required) {
@@ -758,165 +680,26 @@
 	}
 
 	if (change_gpu_state) {
-		KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
-				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, kbdev->shader_needed_cnt);
+		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_TILER_NEEDED, NULL,
+				NULL, 0u, kbdev->tiler_needed_cnt);
 
 		kbase_pm_update_cores_state_nolock(kbdev);
-
-		/* Trace that any state change effectively completes immediately
-		 * - no-one will wait on the state change */
-		kbase_pm_trace_check_and_finish_state_change(kbdev);
-	}
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
-
-enum kbase_pm_cores_ready
-kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
-				bool tiler_required, u64 shader_cores)
-{
-	u64 prev_shader_needed;	/* Just for tracing */
-	u64 prev_shader_inuse;	/* Just for tracing */
-
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	prev_shader_needed = kbdev->shader_needed_bitmap;
-	prev_shader_inuse = kbdev->shader_inuse_bitmap;
-
-	/* If desired_shader_state does not contain the requested cores, then
-	 * power management is not attempting to powering those cores (most
-	 * likely due to core availability policy) and a new job affinity must
-	 * be chosen */
-	if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
-							shader_cores) {
-		return (kbdev->pm.backend.poweroff_wait_in_progress ||
-				kbdev->pm.backend.pm_current_policy == NULL) ?
-				KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY;
-	}
-
-	if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
-	    (tiler_required && !kbdev->tiler_available_bitmap)) {
-		/* Trace ongoing core transition */
-		kbase_timeline_pm_l2_transition_start(kbdev);
-		return KBASE_CORES_NOT_READY;
-	}
-
-	/* If we started to trace a state change, then trace it has being
-	 * finished by now, at the very latest */
-	kbase_pm_trace_check_and_finish_state_change(kbdev);
-	/* Trace core transition done */
-	kbase_timeline_pm_l2_transition_done(kbdev);
-
-	while (shader_cores) {
-		int bitnum = fls64(shader_cores) - 1;
-		u64 bit = 1ULL << bitnum;
-		int cnt;
-
-		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
-
-		cnt = --kbdev->shader_needed_cnt[bitnum];
-
-		if (0 == cnt)
-			kbdev->shader_needed_bitmap &= ~bit;
-
-		/* shader_inuse_cnt should not overflow because there can only
-		 * be a very limited number of jobs on the h/w at one time */
-
-		kbdev->shader_inuse_cnt[bitnum]++;
-		kbdev->shader_inuse_bitmap |= bit;
-
-		shader_cores &= ~bit;
-	}
-
-	if (tiler_required) {
-		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
-
-		--kbdev->tiler_needed_cnt;
-
-		kbdev->tiler_inuse_cnt++;
-
-		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
-	}
-
-	if (prev_shader_needed != kbdev->shader_needed_bitmap)
-		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
-				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
-
-	if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
-		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
-				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
-
-	return KBASE_CORES_READY;
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
-
-void kbase_pm_release_cores(struct kbase_device *kbdev,
-				bool tiler_required, u64 shader_cores)
-{
-	kbase_pm_change_state change_gpu_state = 0u;
-
-	KBASE_DEBUG_ASSERT(kbdev != NULL);
-
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	while (shader_cores) {
-		int bitnum = fls64(shader_cores) - 1;
-		u64 bit = 1ULL << bitnum;
-		int cnt;
-
-		KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
-
-		cnt = --kbdev->shader_inuse_cnt[bitnum];
-
-		if (0 == cnt) {
-			kbdev->shader_inuse_bitmap &= ~bit;
-			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
-		}
-
-		shader_cores &= ~bit;
-	}
-
-	if (tiler_required) {
-		int cnt;
-
-		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
-
-		cnt = --kbdev->tiler_inuse_cnt;
-
-		if (0 == cnt)
-			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
-	}
-
-	if (change_gpu_state) {
-		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
-				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
-
-		kbase_timeline_pm_cores_func(kbdev,
-					KBASE_PM_FUNC_ID_RELEASE_CORES_START,
-							change_gpu_state);
-		kbase_pm_update_cores_state_nolock(kbdev);
-		kbase_timeline_pm_cores_func(kbdev,
-					KBASE_PM_FUNC_ID_RELEASE_CORES_END,
-							change_gpu_state);
-
-		/* Trace that any state change completed immediately */
-		kbase_pm_trace_check_and_finish_state_change(kbdev);
 	}
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
 
 void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
-					bool tiler_required,
-					u64 shader_cores)
+		bool tiler_required, bool shader_required)
 {
 	unsigned long flags;
 
 	kbase_pm_wait_for_poweroff_complete(kbdev);
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
+	kbase_pm_request_cores(kbdev, tiler_required, shader_required);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	kbase_pm_check_transitions_sync(kbdev);
@@ -924,33 +707,52 @@
 
 KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
 
-void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
+static void kbase_pm_l2_caches_ref(struct kbase_device *kbdev)
 {
-	unsigned long flags;
-	u32 prior_l2_users_count;
+	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
-	prior_l2_users_count = kbdev->l2_users_count++;
+	kbdev->l2_users_count++;
 
 	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
 
-	/* if the GPU is reset while the l2 is on, l2 will be off but
-	 * prior_l2_users_count will be > 0. l2_available_bitmap will have been
-	 * set to 0 though by kbase_pm_init_hw */
-	if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
-		kbase_pm_check_transitions_nolock(kbdev);
+	/* Check for the required L2 transitions.
+	 * Caller would block here for the L2 caches of all core groups to be
+	 * powered on, so need to inform the Hw to power up all the L2 caches.
+	 * Can't rely on the l2_users_count value being non-zero previously to
+	 * avoid checking for the transition, as the count could be non-zero
+	 * even if not all the instances of L2 cache are powered up since
+	 * currently the power status of L2 is not tracked separately for each
+	 * core group. Also if the GPU is reset while the L2 is on, L2 will be
+	 * off but the count will be non-zero.
+	 */
+	kbase_pm_check_transitions_nolock(kbdev);
+}
+
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Take the reference on l2_users_count and check core transitions.
+	 */
+	kbase_pm_l2_caches_ref(kbdev);
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
 	wait_event(kbdev->pm.backend.l2_powered_wait,
 					kbdev->pm.backend.l2_powered == 1);
-
-	/* Trace that any state change completed immediately */
-	kbase_pm_trace_check_and_finish_state_change(kbdev);
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
 
+void kbase_pm_request_l2_caches_nolock(struct kbase_device *kbdev)
+{
+	/* Take the reference on l2_users_count and check core transitions.
+	 */
+	kbase_pm_l2_caches_ref(kbdev);
+}
+
 void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -968,11 +770,8 @@
 
 	--kbdev->l2_users_count;
 
-	if (!kbdev->l2_users_count) {
+	if (!kbdev->l2_users_count)
 		kbase_pm_check_transitions_nolock(kbdev);
-		/* Trace that any state change completed immediately */
-		kbase_pm_trace_check_and_finish_state_change(kbdev);
-	}
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
index 852fedd..2e86929 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2015, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -73,11 +73,25 @@
 
 
 /**
- * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ * kbase_pm_request_cores - Request the desired cores to be powered up.
+ * @kbdev:           Kbase device
+ * @tiler_required:  true if tiler is required
+ * @shader_required: true if shaders are required
  *
- * @kbdev:          The kbase device structure for the device
- * @tiler_required: true if the tiler is required, false otherwise
- * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ * Called by the scheduler to request power to the desired cores.
+ *
+ * There is no guarantee that the HW will be powered up on return. Use
+ * kbase_pm_cores_requested()/kbase_pm_cores_ready() to verify that cores are
+ * now powered, or instead call kbase_pm_request_cores_sync().
+ */
+void kbase_pm_request_cores(struct kbase_device *kbdev, bool tiler_required,
+		bool shader_required);
+
+/**
+ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ * @kbdev:           Kbase device
+ * @tiler_required:  true if tiler is required
+ * @shader_required: true if shaders are required
  *
  * When this function returns, the @shader_cores will be in the READY state.
  *
@@ -87,98 +101,79 @@
  * is made.
  */
 void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
-				bool tiler_required, u64 shader_cores);
+		bool tiler_required, bool shader_required);
 
 /**
- * kbase_pm_request_cores - Mark one or more cores as being required
- *                          for jobs to be submitted
+ * kbase_pm_release_cores - Request the desired cores to be powered down.
+ * @kbdev:           Kbase device
+ * @tiler_required:  true if tiler is required
+ * @shader_required: true if shaders are required
  *
- * @kbdev:          The kbase device structure for the device
- * @tiler_required: true if the tiler is required, false otherwise
- * @shader_cores:   A bitmask of shader cores which are necessary for the job
- *
- * This function is called by the job scheduler to mark one or more cores as
- * being required to submit jobs that are ready to run.
- *
- * The cores requested are reference counted and a subsequent call to
- * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
- * made to dereference the cores as being 'needed'.
- *
- * The active power policy will meet or exceed the requirements of the
- * requested cores in the system. Any core transitions needed will be begun
- * immediately, but they might not complete/the cores might not be available
- * until a Power Management IRQ.
- *
- * Return: 0 if the cores were successfully requested, or -errno otherwise.
+ * Called by the scheduler to release its power reference on the desired cores.
  */
-void kbase_pm_request_cores(struct kbase_device *kbdev,
-				bool tiler_required, u64 shader_cores);
+void kbase_pm_release_cores(struct kbase_device *kbdev, bool tiler_required,
+		bool shader_required);
 
 /**
- * kbase_pm_unrequest_cores - Unmark one or more cores as being required for
- *                            jobs to be submitted.
+ * kbase_pm_cores_requested - Check that a power request has been locked into
+ *                            the HW.
+ * @kbdev:           Kbase device
+ * @tiler_required:  true if tiler is required
+ * @shader_required: true if shaders are required
  *
- * @kbdev:          The kbase device structure for the device
- * @tiler_required: true if the tiler is required, false otherwise
- * @shader_cores:   A bitmask of shader cores (as given to
- *                  kbase_pm_request_cores() )
+ * Called by the scheduler to check if a power on request has been locked into
+ * the HW.
  *
- * This function undoes the effect of kbase_pm_request_cores(). It should be
- * used when a job is not going to be submitted to the hardware (e.g. the job is
- * cancelled before it is enqueued).
+ * Note that there is no guarantee that the cores are actually ready, however
+ * when the request has been locked into the HW, then it is safe to submit work
+ * since the HW will wait for the transition to ready.
  *
- * The active power policy will meet or exceed the requirements of the
- * requested cores in the system. Any core transitions needed will be begun
- * immediately, but they might not complete until a Power Management IRQ.
+ * A reference must first be taken prior to making this call.
  *
- * The policy may use this as an indication that it can power down cores.
+ * Caller must hold the hwaccess_lock.
+ *
+ * Return: true if the request to the HW was successfully made else false if the
+ *         request is still pending.
  */
-void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
-				bool tiler_required, u64 shader_cores);
+static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev,
+		bool tiler_required, bool shader_required)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if ((shader_required && !kbdev->shader_available_bitmap) ||
+			(tiler_required && !kbdev->tiler_available_bitmap))
+		return false;
+
+	return true;
+}
 
 /**
- * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
+ * kbase_pm_cores_ready -  Check that the required cores have been powered on by
+ *                         the HW.
+ * @kbdev:           Kbase device
+ * @tiler_required:  true if tiler is required
+ * @shader_required: true if shaders are required
  *
- * @kbdev:          The kbase device structure for the device
- * @tiler_required: true if the tiler is required, false otherwise
- * @shader_cores:   A bitmask of shader cores (as given to
- *                  kbase_pm_request_cores() )
+ * Called by the scheduler to check if cores are ready.
  *
- * This function should be called after kbase_pm_request_cores() when the job
- * is about to be submitted to the hardware. It will check that the necessary
- * cores are available and if so update the 'needed' and 'inuse' bitmasks to
- * reflect that the job is now committed to being run.
+ * Note that the caller should ensure that they have first requested cores
+ * before calling this function.
  *
- * If the necessary cores are not currently available then the function will
- * return %KBASE_CORES_NOT_READY and have no effect.
+ * Caller must hold the hwaccess_lock.
  *
- * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
- *
- *         %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
- *
- *         %KBASE_CORES_READY if the cores requested are already available
+ * Return: true if the cores are ready.
  */
-enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
-						struct kbase_device *kbdev,
-						bool tiler_required,
-						u64 shader_cores);
+static inline bool kbase_pm_cores_ready(struct kbase_device *kbdev,
+		bool tiler_required, bool shader_required)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-/**
- * kbase_pm_release_cores - Release cores after a job has run
- *
- * @kbdev:          The kbase device structure for the device
- * @tiler_required: true if the tiler is required, false otherwise
- * @shader_cores:   A bitmask of shader cores (as given to
- *                  kbase_pm_register_inuse_cores() )
- *
- * This function should be called when a job has finished running on the
- * hardware. A call to kbase_pm_register_inuse_cores() must have previously
- * occurred. The reference counts of the specified cores will be decremented
- * which may cause the bitmask of 'inuse' cores to be reduced. The power policy
- * may then turn off any cores which are no longer 'inuse'.
- */
-void kbase_pm_release_cores(struct kbase_device *kbdev,
-				bool tiler_required, u64 shader_cores);
+	if ((shader_required && !kbdev->shader_ready_bitmap) ||
+			(tiler_required && !kbdev->tiler_available_bitmap))
+		return false;
+
+	return true;
+}
 
 /**
  * kbase_pm_request_l2_caches - Request l2 caches
@@ -200,6 +195,26 @@
 void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
 
 /**
+ * kbase_pm_request_l2_caches_nolock - Request l2 caches, nolock version
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Request the use of l2 caches for all core groups and power up without
+ * waiting for power manager to actually power up the cores. This is done
+ * because the call to this function is done from within the atomic context
+ * and the actual l2 caches being powered up is checked at a later stage.
+ * The reference taken on l2 caches is removed when the protected mode atom
+ * is released so there is no need to make a call to a matching
+ * release_l2_caches().
+ *
+ * This function is used specifically for the case when l2 caches are
+ * to be powered up as part of the sequence for entering protected mode.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_request_l2_caches_nolock(struct kbase_device *kbdev);
+
+/**
  * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
@@ -212,7 +227,7 @@
 void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
 
 /**
- * kbase_pm_request_l2_caches - Release l2 caches
+ * kbase_pm_release_l2_caches - Release l2 caches
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
index cef0745..5e1b761 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -35,24 +35,20 @@
 	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
 	 * correctly */
 	do {
-		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
-									NULL);
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI));
 		*cycle_counter = kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
-		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
-									NULL);
+					GPU_CONTROL_REG(CYCLE_COUNT_LO));
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI));
 		*cycle_counter |= (((u64) hi1) << 32);
 	} while (hi1 != hi2);
 
 	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
 	 * correctly */
 	do {
-		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
-									NULL);
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI));
 		*system_time = kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
-		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
-									NULL);
+					GPU_CONTROL_REG(TIMESTAMP_LO));
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI));
 		*system_time |= (((u64) hi1) << 32);
 	} while (hi1 != hi2);
 
@@ -64,7 +60,7 @@
 
 /**
  * kbase_wait_write_flush -  Wait for GPU write flush
- * @kctx: Context pointer
+ * @kbdev: Kbase device
  *
  * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
  * its write buffer.
@@ -75,7 +71,7 @@
  * not be as expected.
  */
 #ifndef CONFIG_MALI_NO_MALI
-void kbase_wait_write_flush(struct kbase_context *kctx)
+void kbase_wait_write_flush(struct kbase_device *kbdev)
 {
 	u32 base_count = 0;
 
@@ -83,14 +79,14 @@
 	 * The caller must be holding onto the kctx or the call is from
 	 * userspace.
 	 */
-	kbase_pm_context_active(kctx->kbdev);
-	kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
+	kbase_pm_context_active(kbdev);
+	kbase_pm_request_gpu_cycle_counter(kbdev);
 
 	while (true) {
 		u32 new_count;
 
-		new_count = kbase_reg_read(kctx->kbdev,
-					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		new_count = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO));
 		/* First time around, just store the count. */
 		if (base_count == 0) {
 			base_count = new_count;
@@ -102,7 +98,7 @@
 			break;
 	}
 
-	kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
-	kbase_pm_context_idle(kctx->kbdev);
+	kbase_pm_release_gpu_cycle_counter(kbdev);
+	kbase_pm_context_idle(kbdev);
 }
 #endif				/* CONFIG_MALI_NO_MALI */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
index e1bd263..ece70092 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,7 +36,7 @@
 
 /**
  * kbase_wait_write_flush() -  Wait for GPU write flush
- * @kctx:	Context pointer
+ * @kbdev:	Kbase device
  *
  * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
  * its write buffer.
@@ -47,11 +47,11 @@
  * This function is only in use for BASE_HW_ISSUE_6367
  */
 #ifdef CONFIG_MALI_NO_MALI
-static inline void kbase_wait_write_flush(struct kbase_context *kctx)
+static inline void kbase_wait_write_flush(struct kbase_device *kbdev)
 {
 }
 #else
-void kbase_wait_write_flush(struct kbase_context *kctx);
+void kbase_wait_write_flush(struct kbase_device *kbdev);
 #endif
 
 #endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/drivers/gpu/arm/midgard/build.bp b/drivers/gpu/arm/midgard/build.bp
index 4517b46..ada6920 100644
--- a/drivers/gpu/arm/midgard/build.bp
+++ b/drivers/gpu/arm/midgard/build.bp
@@ -3,13 +3,17 @@
  * ----------------------------------------------------------------------------
  * This confidential and proprietary software may be used only as authorized
  * by a licensing agreement from ARM Limited.
- *      (C) COPYRIGHT 2017 ARM Limited, ALL RIGHTS RESERVED
+ *      (C) COPYRIGHT 2017-2018 ARM Limited, ALL RIGHTS RESERVED
  * The entire notice above must be reproduced on all authorized copies and
  * copies may only be made to the extent permitted by a licensing agreement
  * from ARM Limited.
  * ----------------------------------------------------------------------------
  */
 
+/* Kernel-side tests may include mali_kbase's headers. Therefore any config
+ * options which affect the sizes of any structs (e.g. adding extra members)
+ * must be included in these defaults, so that the structs are consistent in
+ * both mali_kbase and the test modules. */
 bob_defaults {
     name: "mali_kbase_shared_config_defaults",
     no_mali: {
@@ -21,7 +25,33 @@
     mali_devfreq: {
         kbuild_options: ["CONFIG_MALI_DEVFREQ=y"],
     },
-
+    mali_midgard_dvfs: {
+        kbuild_options: ["CONFIG_MALI_MIDGARD_DVFS=y"],
+    },
+    mali_debug: {
+        kbuild_options: ["CONFIG_MALI_DEBUG=y"],
+    },
+    mali_fpga_bus_logger: {
+        kbuild_options: ["CONFIG_MALI_FPGA_BUS_LOGGER=y"],
+    },
+    cinstr_job_dump: {
+        kbuild_options: ["CONFIG_MALI_JOB_DUMP=y"],
+    },
+    mali_gator_support: {
+        kbuild_options: ["CONFIG_MALI_GATOR_SUPPORT=y"],
+    },
+    mali_system_trace: {
+        kbuild_options: ["CONFIG_MALI_SYSTEM_TRACE=y"],
+    },
+    mali_pwrsoft_765: {
+        kbuild_options: ["CONFIG_MALI_PWRSOFT_765=y"],
+    },
+    kbuild_options: [
+        "MALI_UNIT_TEST={{.unit_test_code}}",
+        "MALI_CUSTOMER_RELEASE={{.release}}",
+        "MALI_USE_CSF={{.gpu_has_csf}}",
+        "MALI_KERNEL_TEST_API={{.debug}}",
+    ],
     defaults: ["kernel_defaults"],
 }
 
@@ -48,48 +78,31 @@
         "CONFIG_MALI_MIDGARD=m",
         "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
         "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
-        "MALI_KERNEL_TEST_API={{.unit_test_code}}",
         "MALI_MOCK_TEST={{.mali_mock_test}}",
-        "MALI_UNIT_TEST={{.unit_test_code}}",
     ],
-    cinstr_job_dump: {
-        kbuild_options: ["CONFIG_MALI_JOB_DUMP=y"],
-    },
-    mali_debug: {
-        kbuild_options: ["CONFIG_MALI_DEBUG=y"],
-    },
-    mali_gator_support: {
-        kbuild_options: ["CONFIG_MALI_GATOR_SUPPORT=y"],
-    },
-    mali_system_trace: {
-        kbuild_options: ["CONFIG_MALI_SYSTEM_TRACE=y"],
-    },
     mali_error_inject: {
         kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"],
     },
     mali_error_inject_random: {
         kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"],
     },
-    mali_trace_timeline: {
-        kbuild_options: ["CONFIG_MALI_TRACE_TIMELINE=y"],
-    },
-    mali_prfcnt_set_secondary: {
+    cinstr_secondary_hwc: {
         kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY=y"],
     },
-    mali_fpga_bus_logger: {
-        kbuild_options: ["CONFIG_MALI_FPGA_BUS_LOGGER=y"],
-    },
-    mali_midgard_dvfs: {
-        kbuild_options: ["CONFIG_MALI_MIDGARD_DVFS=y"],
-    },
     mali_2mb_alloc: {
         kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
     },
     mali_mock_test: {
         srcs: ["tests/internal/src/mock/mali_kbase_pm_driver_mock.c"],
     },
-    ump: {
-        extra_symbols: ["ump"],
+    gpu_has_csf: {
+        srcs: [
+            "csf/*.c",
+            "csf/*.h",
+            "csf/Kbuild",
+        ],
     },
     defaults: ["mali_kbase_shared_config_defaults"],
 }
+
+optional_subdirs = ["tests"]
diff --git a/drivers/gpu/arm/midgard/docs/Doxyfile b/drivers/gpu/arm/midgard/docs/Doxyfile
index cea7bd9..6498dcb 100644
--- a/drivers/gpu/arm/midgard/docs/Doxyfile
+++ b/drivers/gpu/arm/midgard/docs/Doxyfile
@@ -38,7 +38,7 @@
 # directories like "/usr/src/myproject". Separate the files or directories
 # with spaces.
 
-INPUT                  += ../../kernel/drivers/gpu/arm/midgard/ 
+INPUT                  += ../../kernel/drivers/gpu/arm/midgard/
 
 ##############################################################################
 # Everything below here is optional, and in most cases not required
diff --git a/drivers/gpu/arm/midgard/ipa/Kbuild b/drivers/gpu/arm/midgard/ipa/Kbuild
index fcc079c..3d9cf80 100644
--- a/drivers/gpu/arm/midgard/ipa/Kbuild
+++ b/drivers/gpu/arm/midgard/ipa/Kbuild
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -21,13 +21,8 @@
 
 mali_kbase-y += \
 	ipa/mali_kbase_ipa_simple.o \
-	ipa/mali_kbase_ipa.o
-
-mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o
-
-ifneq ($(wildcard $(src)/ipa/mali_kbase_ipa_vinstr_g71.c),)
-  mali_kbase-y += \
-	ipa/mali_kbase_ipa_vinstr_g71.o \
+	ipa/mali_kbase_ipa.o \
+	ipa/mali_kbase_ipa_vinstr_g7x.o \
 	ipa/mali_kbase_ipa_vinstr_common.o
 
-endif
+mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o
\ No newline at end of file
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
index d16069f..15566f6 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -26,6 +26,7 @@
 #include "mali_kbase_ipa.h"
 #include "mali_kbase_ipa_debugfs.h"
 #include "mali_kbase_ipa_simple.h"
+#include "backend/gpu/mali_kbase_pm_internal.h"
 
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
 #include <linux/pm_opp.h>
@@ -38,10 +39,15 @@
 
 #define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model"
 #define KBASE_IPA_G71_MODEL_NAME      "mali-g71-power-model"
+#define KBASE_IPA_G72_MODEL_NAME      "mali-g72-power-model"
+#define KBASE_IPA_TNOX_MODEL_NAME     "mali-tnox-power-model"
+#define KBASE_IPA_TGOX_R1_MODEL_NAME  "mali-tgox_r1-power-model"
 
 static struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = {
 	&kbase_simple_ipa_model_ops,
-	&kbase_g71_ipa_model_ops
+	&kbase_g71_ipa_model_ops,
+	&kbase_g72_ipa_model_ops,
+	&kbase_tnox_ipa_model_ops
 };
 
 int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
@@ -79,16 +85,6 @@
 	return NULL;
 }
 
-void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev)
-{
-	atomic_set(&kbdev->ipa_use_configured_model, false);
-}
-
-void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev)
-{
-	atomic_set(&kbdev->ipa_use_configured_model, true);
-}
-
 const char *kbase_ipa_model_name_from_id(u32 gpu_id)
 {
 	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
@@ -98,6 +94,17 @@
 		switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
 		case GPU_ID2_PRODUCT_TMIX:
 			return KBASE_IPA_G71_MODEL_NAME;
+		case GPU_ID2_PRODUCT_THEX:
+			return KBASE_IPA_G72_MODEL_NAME;
+		case GPU_ID2_PRODUCT_TNOX:
+			return KBASE_IPA_TNOX_MODEL_NAME;
+		case GPU_ID2_PRODUCT_TGOX:
+			if ((gpu_id & GPU_ID2_VERSION_MAJOR) ==
+					(0 << GPU_ID2_VERSION_MAJOR_SHIFT))
+				/* TGOX r0 shares a power model with TNOX */
+				return KBASE_IPA_TNOX_MODEL_NAME;
+			else
+				return KBASE_IPA_TGOX_R1_MODEL_NAME;
 		default:
 			return KBASE_IPA_FALLBACK_MODEL_NAME;
 		}
@@ -305,14 +312,6 @@
 	/* The simple IPA model must *always* be present.*/
 	ops = kbase_ipa_model_ops_find(kbdev, KBASE_IPA_FALLBACK_MODEL_NAME);
 
-	if (!ops->do_utilization_scaling_in_framework) {
-		dev_err(kbdev->dev,
-			"Fallback IPA model %s should not account for utilization\n",
-			ops->name);
-		err = -EINVAL;
-		goto end;
-	}
-
 	default_model = kbase_ipa_init_model(kbdev, ops);
 	if (!default_model) {
 		err = -EINVAL;
@@ -353,8 +352,6 @@
 		kbdev->ipa.configured_model = default_model;
 	}
 
-	kbase_ipa_model_use_configured_locked(kbdev);
-
 end:
 	if (err)
 		kbase_ipa_term_locked(kbdev);
@@ -443,14 +440,40 @@
 	return div_u64(v3c_big, 1000000);
 }
 
+void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Record the event of GPU entering protected mode. */
+	kbdev->ipa_protection_mode_switched = true;
+}
+
 static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev)
 {
+	struct kbase_ipa_model *model;
+	unsigned long flags;
+
 	lockdep_assert_held(&kbdev->ipa.lock);
 
-	if (atomic_read(&kbdev->ipa_use_configured_model))
-		return kbdev->ipa.configured_model;
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (kbdev->ipa_protection_mode_switched)
+		model = kbdev->ipa.fallback_model;
 	else
-		return kbdev->ipa.fallback_model;
+		model = kbdev->ipa.configured_model;
+
+	/*
+	 * Having taken cognizance of the fact that whether GPU earlier
+	 * protected mode or not, the event can be now reset (if GPU is not
+	 * currently in protected mode) so that configured model is used
+	 * for the next sample.
+	 */
+	if (!kbdev->protected_mode)
+		kbdev->ipa_protection_mode_switched = false;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return model;
 }
 
 static u32 get_static_power_locked(struct kbase_device *kbdev,
@@ -532,7 +555,7 @@
 
 	model = kbdev->ipa.fallback_model;
 
-	err = model->ops->get_dynamic_coeff(model, &power_coeff, freq);
+	err = model->ops->get_dynamic_coeff(model, &power_coeff);
 
 	if (!err)
 		power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
@@ -551,48 +574,63 @@
 	return power;
 }
 
-int kbase_get_real_power(struct devfreq *df, u32 *power,
+int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
 				unsigned long freq,
 				unsigned long voltage)
 {
 	struct kbase_ipa_model *model;
 	u32 power_coeff = 0;
 	int err = 0;
-	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+	struct kbasep_pm_metrics diff;
+	u64 total_time;
 
-	mutex_lock(&kbdev->ipa.lock);
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->ipa.last_metrics, &diff);
 
 	model = get_current_model(kbdev);
 
-	err = model->ops->get_dynamic_coeff(model, &power_coeff, freq);
+	err = model->ops->get_dynamic_coeff(model, &power_coeff);
 
-	/* If we switch to protected model between get_current_model() and
-	 * get_dynamic_coeff(), counter reading could fail. If that happens
-	 * (unlikely, but possible), revert to the fallback model. */
+	/* If the counter model returns an error (e.g. switching back to
+	 * protected mode and failing to read counters, or a counter sample
+	 * with too few cycles), revert to the fallback model.
+	 */
 	if (err && model != kbdev->ipa.fallback_model) {
 		model = kbdev->ipa.fallback_model;
-		err = model->ops->get_dynamic_coeff(model, &power_coeff, freq);
+		err = model->ops->get_dynamic_coeff(model, &power_coeff);
 	}
 
 	if (err)
-		goto exit_unlock;
+		return err;
 
 	*power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
 
-	if (model->ops->do_utilization_scaling_in_framework) {
-		struct devfreq_dev_status *status = &df->last_status;
-		unsigned long total_time = max(status->total_time, 1ul);
-		u64 busy_time = min(status->busy_time, total_time);
-
-		*power = div_u64((u64) *power * (u64) busy_time, total_time);
-	}
+	/* time_busy / total_time cannot be >1, so assigning the 64-bit
+	 * result of div_u64 to *power cannot overflow.
+	 */
+	total_time = diff.time_busy + (u64) diff.time_idle;
+	*power = div_u64(*power * (u64) diff.time_busy,
+			 max(total_time, 1ull));
 
 	*power += get_static_power_locked(kbdev, model, voltage);
 
-exit_unlock:
+	return err;
+}
+KBASE_EXPORT_TEST_API(kbase_get_real_power_locked);
+
+int kbase_get_real_power(struct devfreq *df, u32 *power,
+				unsigned long freq,
+				unsigned long voltage)
+{
+	int ret;
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+
+	mutex_lock(&kbdev->ipa.lock);
+	ret = kbase_get_real_power_locked(kbdev, power, freq, voltage);
 	mutex_unlock(&kbdev->ipa.lock);
 
-	return err;
+	return ret;
 }
 KBASE_EXPORT_TEST_API(kbase_get_real_power);
 
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
index 736399a..4656ded 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,8 +27,17 @@
 
 struct devfreq;
 
+/**
+ * struct kbase_ipa_model - Object describing a particular IPA model.
+ * @kbdev:                    pointer to kbase device
+ * @model_data:               opaque pointer to model specific data, accessed
+ *                            only by model specific methods.
+ * @ops:                      pointer to object containing model specific methods.
+ * @params:                   head of the list of debugfs params added for model
+ * @missing_dt_node_warning:  flag to limit the matching power model DT not found
+ *                            warning to once.
+ */
 struct kbase_ipa_model {
-	struct list_head link;
 	struct kbase_device *kbdev;
 	void *model_data;
 	struct kbase_ipa_model_ops *ops;
@@ -87,8 +96,6 @@
 	 * get_dynamic_coeff() - calculate dynamic power coefficient
 	 * @model:		pointer to model
 	 * @coeffp:		pointer to return value location
-	 * @current_freq:	frequency the GPU has been running at for the
-	 *			previous sampling period.
 	 *
 	 * Calculate a dynamic power coefficient, with units pW/(Hz V^2), which
 	 * is then scaled by the IPA framework according to the current OPP's
@@ -96,8 +103,7 @@
 	 *
 	 * Return: 0 on success, or an error code.
 	 */
-	int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp,
-				 u32 current_freq);
+	int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp);
 	/*
 	 * get_static_coeff() - calculate static power coefficient
 	 * @model:		pointer to model
@@ -109,31 +115,79 @@
 	 * Return: 0 on success, or an error code.
 	 */
 	int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp);
-	/* If false, the model's get_dynamic_coeff() method accounts for how
-	 * long the GPU was active over the sample period. If true, the
-	 * framework will scale the calculated power according to the
-	 * utilization stats recorded by devfreq in get_real_power(). */
-	bool do_utilization_scaling_in_framework;
 };
 
-/* Models can be registered only in the platform's platform_init_func call */
-int kbase_ipa_model_ops_register(struct kbase_device *kbdev,
-			     struct kbase_ipa_model_ops *new_model_ops);
-struct kbase_ipa_model *kbase_ipa_get_model(struct kbase_device *kbdev,
-					    const char *name);
-
+/**
+ * kbase_ipa_init - Initialize the IPA feature
+ * @kbdev:      pointer to kbase device
+ *
+ * simple IPA power model is initialized as a fallback model and if that
+ * initialization fails then IPA is not used.
+ * The device tree is read for the name of ipa model to be used, by using the
+ * property string "ipa-model". If that ipa model is supported then it is
+ * initialized but if the initialization fails then simple power model is used.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
 int kbase_ipa_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_term - Terminate the IPA feature
+ * @kbdev:      pointer to kbase device
+ *
+ * Both simple IPA power model and model retrieved from device tree are
+ * terminated.
+ */
 void kbase_ipa_term(struct kbase_device *kbdev);
-void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev);
-void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_model_recalculate - Recalculate the model coefficients
+ * @model:      pointer to the IPA model object, already initialized
+ *
+ * It shall be called immediately after the model has been initialized
+ * or when the model parameter has changed, so that any coefficients
+ * derived from parameters can be recalculated.
+ * Its a wrapper for the module specific recalculate() method.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
 int kbase_ipa_model_recalculate(struct kbase_ipa_model *model);
+
+/**
+ * kbase_ipa_init_model - Initilaize the particular IPA model
+ * @kbdev:      pointer to kbase device
+ * @ops:        pointer to object containing model specific methods.
+ *
+ * Initialize the model corresponding to the @ops pointer passed.
+ * The init() method specified in @ops would be called.
+ *
+ * Return: pointer to kbase_ipa_model on success, NULL on error
+ */
 struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
 					     struct kbase_ipa_model_ops *ops);
+/**
+ * kbase_ipa_term_model - Terminate the particular IPA model
+ * @model:      pointer to the IPA model object, already initialized
+ *
+ * Terminate the model, using the term() method.
+ * Module specific parameters would be freed.
+ */
 void kbase_ipa_term_model(struct kbase_ipa_model *model);
 
-extern struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
+/**
+ * kbase_ipa_protection_mode_switch_event - Inform IPA of the GPU's entry into
+ *                                          protected mode
+ * @kbdev:      pointer to kbase device
+ *
+ * Makes IPA aware of the GPU switching to protected mode.
+ */
+void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev);
 
-#if MALI_UNIT_TEST
+extern struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
+extern struct kbase_ipa_model_ops kbase_g72_ipa_model_ops;
+extern struct kbase_ipa_model_ops kbase_tnox_ipa_model_ops;
+extern struct kbase_ipa_model_ops kbase_tgox_r1_ipa_model_ops;
+
 /**
  * kbase_get_real_power() - get the real power consumption of the GPU
  * @df: dynamic voltage and frequency scaling information for the GPU.
@@ -141,14 +195,22 @@
  * @freq: a frequency, in HZ.
  * @voltage: a voltage, in mV.
  *
- * This function is only exposed for use by unit tests. The returned value
- * incorporates both static and dynamic power consumption.
+ * The returned value incorporates both static and dynamic power consumption.
  *
  * Return: 0 on success, or an error code.
  */
 int kbase_get_real_power(struct devfreq *df, u32 *power,
 				unsigned long freq,
 				unsigned long voltage);
+
+#if MALI_UNIT_TEST
+/* Called by kbase_get_real_power() to invoke the power models.
+ * Must be called with kbdev->ipa.lock held.
+ * This function is only exposed for use by unit tests.
+ */
+int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
+				unsigned long freq,
+				unsigned long voltage);
 #endif /* MALI_UNIT_TEST */
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
@@ -159,10 +221,7 @@
 
 #else /* !(defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
 
-static inline void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev)
-{ }
-
-static inline void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev)
+static inline void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev)
 { }
 
 #endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
index 029023c..071a530 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -188,6 +188,23 @@
 	}
 }
 
+static int current_power_get(void *data, u64 *val)
+{
+	struct kbase_device *kbdev = data;
+	struct devfreq *df = kbdev->devfreq;
+	u32 power;
+
+	kbase_pm_context_active(kbdev);
+	kbase_get_real_power(df, &power,
+		kbdev->current_nominal_freq, (kbdev->current_voltage / 1000));
+	kbase_pm_context_idle(kbdev);
+
+	*val = power;
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(current_power, current_power_get, NULL, "%llu\n");
+
 static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model)
 {
 	struct list_head *it;
@@ -264,5 +281,8 @@
 		kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model);
 	kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model);
 
+	debugfs_create_file("ipa_current_power", 0444,
+			kbdev->mali_debugfs_directory, kbdev, &current_power);
+
 	mutex_unlock(&kbdev->ipa.lock);
 }
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c
index 639ade2..e684df4a 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -43,7 +43,7 @@
 	struct thermal_zone_device *tz,
 	unsigned long *temp)
 {
-	*temp = ACCESS_ONCE(dummy_temp);
+	*temp = READ_ONCE(dummy_temp);
 	return 0;
 }
 
@@ -54,7 +54,7 @@
 	struct thermal_zone_device *tz,
 	int *temp)
 {
-	*temp = ACCESS_ONCE(dummy_temp);
+	*temp = READ_ONCE(dummy_temp);
 	return 0;
 }
 #endif
@@ -68,7 +68,7 @@
 
 void kbase_simple_power_model_set_dummy_temp(int temp)
 {
-	ACCESS_ONCE(dummy_temp) = temp;
+	WRITE_ONCE(dummy_temp, temp);
 }
 KBASE_EXPORT_TEST_API(kbase_simple_power_model_set_dummy_temp);
 
@@ -155,7 +155,7 @@
 #endif
 
 	while (!kthread_should_stop()) {
-		struct thermal_zone_device *tz = ACCESS_ONCE(model_data->gpu_tz);
+		struct thermal_zone_device *tz = READ_ONCE(model_data->gpu_tz);
 
 		if (tz) {
 			int ret;
@@ -170,9 +170,9 @@
 			temp = FALLBACK_STATIC_TEMPERATURE;
 		}
 
-		ACCESS_ONCE(model_data->current_temperature) = temp;
+		WRITE_ONCE(model_data->current_temperature, temp);
 
-		msleep_interruptible(ACCESS_ONCE(model_data->temperature_poll_interval_ms));
+		msleep_interruptible(READ_ONCE(model_data->temperature_poll_interval_ms));
 	}
 
 	return 0;
@@ -186,7 +186,7 @@
 	u64 coeff_big;
 	int temp;
 
-	temp = ACCESS_ONCE(model_data->current_temperature);
+	temp = READ_ONCE(model_data->current_temperature);
 
 	/* Range: 0 <= temp_scaling_factor < 2^24 */
 	temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts,
@@ -203,8 +203,7 @@
 	return 0;
 }
 
-static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
-			       u32 current_freq)
+static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
 {
 	struct kbase_ipa_model_simple_data *model_data =
 		(struct kbase_ipa_model_simple_data *) model->model_data;
@@ -347,6 +346,5 @@
 		.term = &kbase_simple_power_model_term,
 		.get_dynamic_coeff = &model_dynamic_coeff,
 		.get_static_coeff = &model_static_coeff,
-		.do_utilization_scaling_in_framework = true,
 };
 KBASE_EXPORT_TEST_API(kbase_simple_ipa_model_ops);
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
index b9a9e57..699252d 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -21,35 +21,15 @@
  */
 
 #include "mali_kbase_ipa_vinstr_common.h"
+#include "mali_kbase_ipa_debugfs.h"
 
-#if MALI_UNIT_TEST
-static ktime_t dummy_time;
+#define DEFAULT_SCALING_FACTOR 5
 
-/* Intercept calls to the kernel function using a macro */
-#ifdef ktime_get
-#undef ktime_get
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
-#define ktime_get() (ACCESS_ONCE(dummy_time))
-
-void kbase_ipa_set_dummy_time(ktime_t t)
-{
-	ACCESS_ONCE(dummy_time) = t;
-}
-KBASE_EXPORT_TEST_API(kbase_ipa_set_dummy_time);
-#else
-#define ktime_get() (READ_ONCE(dummy_time))
-
-void kbase_ipa_set_dummy_time(ktime_t t)
-{
-	WRITE_ONCE(dummy_time, t);
-}
-KBASE_EXPORT_TEST_API(kbase_ipa_set_dummy_time);
-
-#endif
-
-#endif /* MALI_UNIT_TEST */
+/* If the value of GPU_ACTIVE is below this, use the simple model
+ * instead, to avoid extrapolating small amounts of counter data across
+ * large sample periods.
+ */
+#define DEFAULT_MIN_SAMPLE_CYCLES 10000
 
 /**
  * read_hwcnt() - read a counter value
@@ -99,10 +79,32 @@
 		core_mask >>= 1;
 	}
 
-	/* Range: -2^54 < ret < 2^54 */
-	ret *= coeff;
+	/* Range: -2^54 < ret * coeff < 2^54 */
+	return ret * coeff;
+}
 
-	return div_s64(ret, 1000000);
+s64 kbase_ipa_sum_all_memsys_blocks(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+	const u32 num_blocks = kbdev->gpu_props.props.l2_props.num_l2_slices;
+	u32 base = 0;
+	s64 ret = 0;
+	u32 i;
+
+	for (i = 0; i < num_blocks; i++) {
+		/* 0 < counter_value < 2^27 */
+		u32 counter_value = kbase_ipa_read_hwcnt(model_data,
+					       base + counter);
+
+		/* 0 < ret < 2^27 * max_num_memsys_blocks = 2^29 */
+		ret = kbase_ipa_add_saturate(ret, counter_value);
+		base += KBASE_IPA_NR_BYTES_PER_BLOCK;
+	}
+
+	/* Range: -2^51 < ret * coeff < 2^51 */
+	return ret * coeff;
 }
 
 s64 kbase_ipa_single_counter(
@@ -113,16 +115,49 @@
 	const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter);
 
 	/* Range: -2^49 < ret < 2^49 */
-	const s64 multiplied = (s64) counter_value * (s64) coeff;
+	return counter_value * (s64) coeff;
+}
 
-	/* Range: -2^29 < return < 2^29 */
-	return div_s64(multiplied, 1000000);
+/**
+ * kbase_ipa_gpu_active - Inform IPA that GPU is now active
+ * @model_data: Pointer to model data
+ *
+ * This function may cause vinstr to become active.
+ */
+static void kbase_ipa_gpu_active(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (!kbdev->ipa.vinstr_active) {
+		kbdev->ipa.vinstr_active = true;
+		kbase_vinstr_resume_client(model_data->vinstr_cli);
+	}
+}
+
+/**
+ * kbase_ipa_gpu_idle - Inform IPA that GPU is now idle
+ * @model_data: Pointer to model data
+ *
+ * This function may cause vinstr to become idle.
+ */
+static void kbase_ipa_gpu_idle(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->ipa.vinstr_active) {
+		kbase_vinstr_suspend_client(model_data->vinstr_cli);
+		kbdev->ipa.vinstr_active = false;
+	}
 }
 
 int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
 {
 	struct kbase_device *kbdev = model_data->kbdev;
-	struct kbase_uk_hwcnt_reader_setup setup;
+	struct kbase_ioctl_hwcnt_reader_setup setup;
 	size_t dump_size;
 
 	dump_size = kbase_vinstr_dump_size(kbdev);
@@ -145,90 +180,208 @@
 		return -1;
 	}
 
-	model_data->last_sample_read_time = ktime_get();
 	kbase_vinstr_hwc_clear(model_data->vinstr_cli);
 
+	kbdev->ipa.gpu_active_callback = kbase_ipa_gpu_active;
+	kbdev->ipa.gpu_idle_callback = kbase_ipa_gpu_idle;
+	kbdev->ipa.model_data = model_data;
+	kbdev->ipa.vinstr_active = false;
+	/* Suspend vinstr, to ensure that the GPU is powered off until there is
+	 * something to execute.
+	 */
+	kbase_vinstr_suspend_client(model_data->vinstr_cli);
+
 	return 0;
 }
 
 void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
 {
+	struct kbase_device *kbdev = model_data->kbdev;
+
+	kbdev->ipa.gpu_active_callback = NULL;
+	kbdev->ipa.gpu_idle_callback = NULL;
+	kbdev->ipa.model_data = NULL;
+	kbdev->ipa.vinstr_active = false;
+
 	if (model_data->vinstr_cli)
 		kbase_vinstr_detach_client(model_data->vinstr_cli);
+
 	model_data->vinstr_cli = NULL;
 	kfree(model_data->vinstr_buffer);
 	model_data->vinstr_buffer = NULL;
 }
 
-int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
-	u32 current_freq)
+int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
 {
 	struct kbase_ipa_model_vinstr_data *model_data =
 			(struct kbase_ipa_model_vinstr_data *)model->model_data;
+	struct kbase_device *kbdev = model_data->kbdev;
 	s64 energy = 0;
 	size_t i;
-	ktime_t now = ktime_get();
-	ktime_t time_since_last_sample =
-			ktime_sub(now, model_data->last_sample_read_time);
-	/* Range: 2^0 < time_since_last_sample_ms < 2^10 (1-1000ms) */
-	s64 time_since_last_sample_ms = ktime_to_ms(time_since_last_sample);
-	u64 coeff = 0;
-	u64 num_cycles;
+	u64 coeff = 0, coeff_mul = 0;
+	u32 active_cycles;
 	int err = 0;
 
+	if (!kbdev->ipa.vinstr_active) {
+		err = -ENODATA;
+		goto err0; /* GPU powered off - no counters to collect */
+	}
+
 	err = kbase_vinstr_hwc_dump(model_data->vinstr_cli,
 				    BASE_HWCNT_READER_EVENT_MANUAL);
 	if (err)
 		goto err0;
 
-	model_data->last_sample_read_time = now;
+	/* Range: 0 (GPU not used at all), to the max sampling interval, say
+	 * 1s, * max GPU frequency (GPU 100% utilized).
+	 * 0 <= active_cycles <= 1 * ~2GHz
+	 * 0 <= active_cycles < 2^31
+	 */
+	active_cycles = model_data->get_active_cycles(model_data);
 
-	/* Range of 'energy' is +/- 2^34 * number of IPA groups, so around
-	 * -2^38 < energy < 2^38 */
+	if (active_cycles < (u32) max(model_data->min_sample_cycles, 0)) {
+		err = -ENODATA;
+		goto err0;
+	}
+
+	/* Range: 1 <= active_cycles < 2^31 */
+	active_cycles = max(1u, active_cycles);
+
+	/* Range of 'energy' is +/- 2^54 * number of IPA groups (~8), so around
+	 * -2^57 < energy < 2^57
+	 */
 	for (i = 0; i < model_data->groups_def_num; i++) {
 		const struct kbase_ipa_group *group = &model_data->groups_def[i];
-		s32 coeff, group_energy;
-
-		coeff = model_data->group_values[i];
-		group_energy = group->op(model_data, coeff, group->counter_block_offset);
+		s32 coeff = model_data->group_values[i];
+		s64 group_energy = group->op(model_data, coeff,
+					     group->counter_block_offset);
 
 		energy = kbase_ipa_add_saturate(energy, group_energy);
 	}
 
-	/* Range: 0 <= coeff < 2^38 */
+	/* Range: 0 <= coeff < 2^57 */
 	if (energy > 0)
 		coeff = energy;
 
-	/* Scale by user-specified factor and divide by 1000. But actually
-	 * cancel the division out, because we want the num_cycles in KHz and
-	 * don't want to lose precision. */
+	/* Range: 0 <= coeff < 2^57 (because active_cycles >= 1). However, this
+	 * can be constrained further: Counter values can only be increased by
+	 * a theoretical maximum of about 64k per clock cycle. Beyond this,
+	 * we'd have to sample every 1ms to avoid them overflowing at the
+	 * lowest clock frequency (say 100MHz). Therefore, we can write the
+	 * range of 'coeff' in terms of active_cycles:
+	 *
+	 * coeff = SUM(coeffN * counterN * num_cores_for_counterN)
+	 * coeff <= SUM(coeffN * counterN) * max_num_cores
+	 * coeff <= num_IPA_groups * max_coeff * max_counter * max_num_cores
+	 *       (substitute max_counter = 2^16 * active_cycles)
+	 * coeff <= num_IPA_groups * max_coeff * 2^16 * active_cycles * max_num_cores
+	 * coeff <=    2^3         *    2^22   * 2^16 * active_cycles * 2^5
+	 * coeff <= 2^46 * active_cycles
+	 *
+	 * So after the division: 0 <= coeff <= 2^46
+	 */
+	coeff = div_u64(coeff, active_cycles);
 
-	/* Range: 0 < coeff < 2^53 */
-	coeff = coeff * model_data->scaling_factor;
+	/* Not all models were derived at the same reference voltage. Voltage
+	 * scaling is done by multiplying by V^2, so we need to *divide* by
+	 * Vref^2 here.
+	 * Range: 0 <= coeff <= 2^49
+	 */
+	coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
+	/* Range: 0 <= coeff <= 2^52 */
+	coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
 
-	if (time_since_last_sample_ms == 0) {
-		time_since_last_sample_ms = 1;
-	} else if (time_since_last_sample_ms < 0) {
-		err = -ERANGE;
-		goto err0;
-	}
+	/* Scale by user-specified integer factor.
+	 * Range: 0 <= coeff_mul < 2^57
+	 */
+	coeff_mul = coeff * model_data->scaling_factor;
 
-	/* Range: 2^20 < num_cycles < 2^40 mCycles */
-	num_cycles = (u64) current_freq * (u64) time_since_last_sample_ms;
-	/* Range: 2^10 < num_cycles < 2^30 Cycles */
-	num_cycles = div_u64(num_cycles, 1000000);
-
-	/* num_cycles should never be 0 in _normal_ usage (because we expect
-	 * frequencies on the order of MHz and >10ms polling intervals), but
-	 * protect against divide-by-zero anyway. */
-	if (num_cycles == 0)
-		num_cycles = 1;
-
-	/* Range: 0 < coeff < 2^43 */
-	coeff = div_u64(coeff, num_cycles);
+	/* The power models have results with units
+	 * mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this
+	 * becomes fW/(Hz V^2), which are the units of coeff_mul. However,
+	 * kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide
+	 * by 1000.
+	 * Range: 0 <= coeff_mul < 2^47
+	 */
+	coeff_mul = div_u64(coeff_mul, 1000u);
 
 err0:
 	/* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */
-	*coeffp = clamp(coeff, (u64) 0, (u64) 1 << 16);
+	*coeffp = clamp(coeff_mul, (u64) 0, (u64) 1 << 16);
 	return err;
 }
+
+int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
+				       const struct kbase_ipa_group *ipa_groups_def,
+				       size_t ipa_group_size,
+				       kbase_ipa_get_active_cycles_callback get_active_cycles,
+				       s32 reference_voltage)
+{
+	int err = 0;
+	size_t i;
+	struct kbase_ipa_model_vinstr_data *model_data;
+
+	if (!model || !ipa_groups_def || !ipa_group_size || !get_active_cycles)
+		return -EINVAL;
+
+	model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
+	if (!model_data)
+		return -ENOMEM;
+
+	model_data->kbdev = model->kbdev;
+	model_data->groups_def = ipa_groups_def;
+	model_data->groups_def_num = ipa_group_size;
+	model_data->get_active_cycles = get_active_cycles;
+
+	model->model_data = (void *) model_data;
+
+	for (i = 0; i < model_data->groups_def_num; ++i) {
+		const struct kbase_ipa_group *group = &model_data->groups_def[i];
+
+		model_data->group_values[i] = group->default_value;
+		err = kbase_ipa_model_add_param_s32(model, group->name,
+					&model_data->group_values[i],
+					1, false);
+		if (err)
+			goto exit;
+	}
+
+	model_data->scaling_factor = DEFAULT_SCALING_FACTOR;
+	err = kbase_ipa_model_add_param_s32(model, "scale",
+					    &model_data->scaling_factor,
+					    1, false);
+	if (err)
+		goto exit;
+
+	model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES;
+	err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles",
+					    &model_data->min_sample_cycles,
+					    1, false);
+	if (err)
+		goto exit;
+
+	model_data->reference_voltage = reference_voltage;
+	err = kbase_ipa_model_add_param_s32(model, "reference_voltage",
+					    &model_data->reference_voltage,
+					    1, false);
+	if (err)
+		goto exit;
+
+	err = kbase_ipa_attach_vinstr(model_data);
+
+exit:
+	if (err) {
+		kbase_ipa_model_param_free_all(model);
+		kfree(model_data);
+	}
+	return err;
+}
+
+void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_vinstr_data *model_data =
+			(struct kbase_ipa_model_vinstr_data *)model->model_data;
+
+	kbase_ipa_detach_vinstr(model_data);
+	kfree(model_data);
+}
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
index 7233642..0deafae 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,9 +25,6 @@
 
 #include "mali_kbase.h"
 
-/* Maximum length for the name of an IPA group. */
-#define KBASE_IPA_MAX_GROUP_NAME_LEN 15
-
 /* Maximum number of IPA groups for an IPA model. */
 #define KBASE_IPA_MAX_GROUP_DEF_NUM  16
 
@@ -41,30 +38,45 @@
 #define KBASE_IPA_NR_BYTES_PER_BLOCK \
 	(KBASE_IPA_NR_CNT_PER_BLOCK * KBASE_IPA_NR_BYTES_PER_CNT)
 
+struct kbase_ipa_model_vinstr_data;
 
+typedef u32 (*kbase_ipa_get_active_cycles_callback)(struct kbase_ipa_model_vinstr_data *);
 
 /**
  * struct kbase_ipa_model_vinstr_data - IPA context per device
  * @kbdev:               pointer to kbase device
  * @groups_def:          Array of IPA groups.
  * @groups_def_num:      Number of elements in the array of IPA groups.
+ * @get_active_cycles:   Callback to return number of active cycles during
+ *                       counter sample period
  * @vinstr_cli:          vinstr client handle
  * @vinstr_buffer:       buffer to dump hardware counters onto
- * @last_sample_read_time: timestamp of last vinstr buffer read
- * @scaling_factor:      user-specified power scaling factor. This is
- *                       interpreted as a fraction where the denominator is
- *                       1000. Range approx 0.0-32.0:
- *                       0 < scaling_factor < 2^15
+ * @reference_voltage:   voltage, in mV, of the operating point used when
+ *                       deriving the power model coefficients. Range approx
+ *                       0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13
+ * @scaling_factor:      User-specified power scaling factor. This is an
+ *                       integer, which is multiplied by the power coefficient
+ *                       just before OPP scaling.
+ *                       Range approx 0-32: 0 < scaling_factor < 2^5
+ * @min_sample_cycles:   If the value of the GPU_ACTIVE counter (the number of
+ *                       cycles the GPU was working) is less than
+ *                       min_sample_cycles, the counter model will return an
+ *                       error, causing the IPA framework to approximate using
+ *                       the cached simple model results instead. This may be
+ *                       more accurate than extrapolating  using a very small
+ *                       counter dump.
  */
 struct kbase_ipa_model_vinstr_data {
 	struct kbase_device *kbdev;
 	s32 group_values[KBASE_IPA_MAX_GROUP_DEF_NUM];
 	const struct kbase_ipa_group *groups_def;
 	size_t groups_def_num;
+	kbase_ipa_get_active_cycles_callback get_active_cycles;
 	struct kbase_vinstr_client *vinstr_cli;
 	void *vinstr_buffer;
-	ktime_t last_sample_read_time;
+	s32 reference_voltage;
 	s32 scaling_factor;
+	s32 min_sample_cycles;
 };
 
 /**
@@ -77,38 +89,57 @@
  * @counter_block_offset:  block offset in bytes of the counter used to calculate energy for IPA group
  */
 struct kbase_ipa_group {
-	char name[KBASE_IPA_MAX_GROUP_NAME_LEN + 1];
+	const char *name;
 	s32 default_value;
 	s64 (*op)(struct kbase_ipa_model_vinstr_data *, s32, u32);
 	u32 counter_block_offset;
 };
 
 /**
- * sum_all_shader_cores() - sum a counter over all cores
- * @model_data		pointer to model data
- * @coeff		model coefficient. Unity is ~2^20, so range approx
- * +/- 4.0: -2^22 < coeff < 2^22
- * @counter     offset in bytes of the counter used to calculate energy for IPA group
+ * kbase_ipa_sum_all_shader_cores() - sum a counter over all cores
+ * @model_data:		pointer to model data
+ * @coeff:		model coefficient. Unity is ~2^20, so range approx
+ *			+/- 4.0: -2^22 < coeff < 2^22
+ * @counter		offset in bytes of the counter used to calculate energy
+ *			for IPA group
  *
  * Calculate energy estimation based on hardware counter `counter'
  * across all shader cores.
  *
- * Return: Sum of counter values. Range: -2^34 < ret < 2^34
+ * Return: Sum of counter values. Range: -2^54 < ret < 2^54
  */
 s64 kbase_ipa_sum_all_shader_cores(
 	struct kbase_ipa_model_vinstr_data *model_data,
 	s32 coeff, u32 counter);
 
 /**
- * sum_single_counter() - sum a single counter
- * @model_data		pointer to model data
- * @coeff		model coefficient. Unity is ~2^20, so range approx
- * +/- 4.0: -2^22 < coeff < 2^22
- * @counter     offset in bytes of the counter used to calculate energy for IPA group
+ * kbase_ipa_sum_all_memsys_blocks() - sum a counter over all mem system blocks
+ * @model_data:		pointer to model data
+ * @coeff:		model coefficient. Unity is ~2^20, so range approx
+ *			+/- 4.0: -2^22 < coeff < 2^22
+ * @counter:		offset in bytes of the counter used to calculate energy
+ *			for IPA group
+ *
+ * Calculate energy estimation based on hardware counter `counter' across all
+ * memory system blocks.
+ *
+ * Return: Sum of counter values. Range: -2^51 < ret < 2^51
+ */
+s64 kbase_ipa_sum_all_memsys_blocks(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter);
+
+/**
+ * kbase_ipa_single_counter() - sum a single counter
+ * @model_data:		pointer to model data
+ * @coeff:		model coefficient. Unity is ~2^20, so range approx
+ *			+/- 4.0: -2^22 < coeff < 2^22
+ * @counter:		offset in bytes of the counter used to calculate energy
+ *			for IPA group
  *
  * Calculate energy estimation based on hardware counter `counter'.
  *
- * Return: Counter value. Range: -2^34 < ret < 2^34
+ * Return: Counter value. Range: -2^49 < ret < 2^49
  */
 s64 kbase_ipa_single_counter(
 	struct kbase_ipa_model_vinstr_data *model_data,
@@ -139,9 +170,6 @@
  * @model:		pointer to instantiated model
  * @coeffp:		pointer to location where calculated power, in
  *			pW/(Hz V^2), is stored.
- * @current_freq:	frequency the GPU has been running at over the sample
- *			period. In Hz. Range: 10 MHz < 1GHz,
- *			2^20 < current_freq < 2^30
  *
  * This is a GPU-agnostic implementation of the get_dynamic_coeff()
  * function of an IPA model. It relies on the model being populated
@@ -149,20 +177,39 @@
  *
  * Return: 0 on success, or an error code.
  */
-int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
-	u32 current_freq);
+int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp);
 
-#if MALI_UNIT_TEST
 /**
- * kbase_ipa_set_dummy_time() - set a dummy monotonic time value
- * @t: a monotonic time value
+ * kbase_ipa_vinstr_common_model_init() - initialize ipa power model
+ * @model:		ipa power model to initialize
+ * @ipa_groups_def:	array of ipa groups which sets coefficients for
+ *			the corresponding counters used in the ipa model
+ * @ipa_group_size:     number of elements in the array @ipa_groups_def
+ * @get_active_cycles:  callback to return the number of cycles the GPU was
+ *			active during the counter sample period.
+ * @reference_voltage:  voltage, in mV, of the operating point used when
+ *                      deriving the power model coefficients.
  *
- * This is only intended for use in unit tests, to ensure that the kernel time
- * values used by a power model are predictable. Deterministic behavior is
- * necessary to allow validation of the dynamic power values computed by the
- * model.
+ * This initialization function performs initialization steps common
+ * for ipa models based on counter values. In each call, the model
+ * passes its specific coefficient values per ipa counter group via
+ * @ipa_groups_def array.
+ *
+ * Return: 0 on success, error code otherwise
  */
-void kbase_ipa_set_dummy_time(ktime_t t);
-#endif /* MALI_UNIT_TEST */
+int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
+				       const struct kbase_ipa_group *ipa_groups_def,
+				       size_t ipa_group_size,
+				       kbase_ipa_get_active_cycles_callback get_active_cycles,
+				       s32 reference_voltage);
+
+/**
+ * kbase_ipa_vinstr_common_model_term() - terminate ipa power model
+ * @model: ipa power model to terminate
+ *
+ * This function performs all necessary steps to terminate ipa power model
+ * including clean up of resources allocated to hold model data.
+ */
+void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model);
 
 #endif /* _KBASE_IPA_VINSTR_COMMON_H_ */
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g71.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g71.c
deleted file mode 100644
index d07fb36..0000000
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g71.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-#include <linux/thermal.h>
-
-#include "mali_kbase_ipa_vinstr_common.h"
-#include "mali_kbase.h"
-#include "mali_kbase_ipa_debugfs.h"
-
-
-/* Performance counter blocks base offsets */
-#define JM_BASE             (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
-#define TILER_BASE          (1 * KBASE_IPA_NR_BYTES_PER_BLOCK)
-#define MEMSYS_BASE         (2 * KBASE_IPA_NR_BYTES_PER_BLOCK)
-#define SC0_BASE_ONE_MEMSYS (3 * KBASE_IPA_NR_BYTES_PER_BLOCK)
-#define SC0_BASE_TWO_MEMSYS (4 * KBASE_IPA_NR_BYTES_PER_BLOCK)
-
-/* JM counter block offsets */
-#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT *  6)
-
-/* Tiler counter block offsets */
-#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45)
-
-/* MEMSYS counter block offsets */
-#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25)
-
-/* SC counter block offsets */
-#define SC_FRAG_ACTIVE      (KBASE_IPA_NR_BYTES_PER_CNT *  4)
-#define SC_EXEC_CORE_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 26)
-#define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28)
-#define SC_TEX_COORD_ISSUE  (KBASE_IPA_NR_BYTES_PER_CNT * 40)
-#define SC_VARY_SLOT_32     (KBASE_IPA_NR_BYTES_PER_CNT * 50)
-#define SC_VARY_SLOT_16     (KBASE_IPA_NR_BYTES_PER_CNT * 51)
-#define SC_BEATS_RD_LSC     (KBASE_IPA_NR_BYTES_PER_CNT * 56)
-#define SC_BEATS_WR_LSC     (KBASE_IPA_NR_BYTES_PER_CNT * 61)
-#define SC_BEATS_WR_TIB     (KBASE_IPA_NR_BYTES_PER_CNT * 62)
-
-/** Maximum number of cores for which a single Memory System block of performance counters is present. */
-#define KBASE_G71_SINGLE_MEMSYS_MAX_NUM_CORES ((u8)4)
-
-
-/**
- * get_jm_counter() - get performance counter offset inside the Job Manager block
- * @model_data:            pointer to GPU model data.
- * @counter_block_offset:  offset in bytes of the performance counter inside the Job Manager block.
- *
- * Return: Block offset in bytes of the required performance counter.
- */
-static u32 kbase_g71_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data,
-                                                u32 counter_block_offset)
-{
-	return JM_BASE + counter_block_offset;
-}
-
-/**
- * get_memsys_counter() - get peformance counter offset inside the Memory System block
- * @model_data:            pointer to GPU model data.
- * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Memory System block.
- *
- * Return: Block offset in bytes of the required performance counter.
- */
-static u32 kbase_g71_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data,
-                                                    u32 counter_block_offset)
-{
-	/* The base address of Memory System performance counters is always the same, although their number
-	 * may vary based on the number of cores. For the moment it's ok to return a constant.
-	 */
-	return MEMSYS_BASE + counter_block_offset;
-}
-
-/**
- * get_sc_counter() - get performance counter offset inside the Shader Cores block
- * @model_data:            pointer to GPU model data.
- * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Shader Cores block.
- *
- * Return: Block offset in bytes of the required performance counter.
- */
-static u32 kbase_g71_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data,
-                                                u32 counter_block_offset)
-{
-	const u32 sc_base = model_data->kbdev->gpu_props.num_cores <= KBASE_G71_SINGLE_MEMSYS_MAX_NUM_CORES ?
-	                    SC0_BASE_ONE_MEMSYS :
-	                    SC0_BASE_TWO_MEMSYS;
-
-	return sc_base + counter_block_offset;
-}
-
-/**
- * memsys_single_counter() - calculate energy for a single Memory System performance counter.
- * @model_data:            pointer to GPU model data.
- * @coeff:                 default value of coefficient for IPA group.
- * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
- *
- * Return: Energy estimation for a single Memory System performance counter.
- */
-static s64 kbase_g71_memsys_single_counter(
-    struct kbase_ipa_model_vinstr_data *model_data,
-    s32 coeff,
-    u32 counter_block_offset)
-{
-	return kbase_ipa_single_counter(model_data, coeff,
-	                                kbase_g71_power_model_get_memsys_counter(model_data, counter_block_offset));
-}
-
-/**
- * sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores.
- * @model_data:            pointer to GPU model data.
- * @coeff:                 default value of coefficient for IPA group.
- * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
- *
- * Return: Energy estimation for a Shader Cores performance counter for all cores.
- */
-static s64 kbase_g71_sum_all_shader_cores(
-    struct kbase_ipa_model_vinstr_data *model_data,
-    s32 coeff,
-    u32 counter_block_offset)
-{
-	return kbase_ipa_sum_all_shader_cores(model_data, coeff,
-	                                      kbase_g71_power_model_get_sc_counter(model_data, counter_block_offset));
-}
-
-/**
- * jm_single_counter() - calculate energy for a single Job Manager performance counter.
- * @model_data:            pointer to GPU model data.
- * @coeff:                 default value of coefficient for IPA group.
- * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
- *
- * Return: Energy estimation for a single Job Manager performance counter.
- */
-static s64 kbase_g71_jm_single_counter(
-    struct kbase_ipa_model_vinstr_data *model_data,
-    s32 coeff,
-    u32 counter_block_offset)
-{
-	return kbase_ipa_single_counter(model_data, coeff,
-	                                kbase_g71_power_model_get_jm_counter(model_data, counter_block_offset));
-}
-
-/** Table of IPA group definitions.
- *
- * For each IPA group, this table defines a function to access the given performance block counter (or counters,
- * if the operation needs to be iterated on multiple blocks) and calculate energy estimation.
- */
-static const struct kbase_ipa_group ipa_groups_def[] = {
-	{
-		.name = "l2_access",
-		.default_value = 526300,
-		.op = kbase_g71_memsys_single_counter,
-		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
-	},
-	{
-		.name = "exec_instr_count",
-		.default_value = 301100,
-		.op = kbase_g71_sum_all_shader_cores,
-		.counter_block_offset = SC_EXEC_INSTR_COUNT,
-	},
-	{
-		.name = "tex_issue",
-		.default_value = 197400,
-		.op = kbase_g71_sum_all_shader_cores,
-		.counter_block_offset = SC_TEX_COORD_ISSUE,
-	},
-	{
-		.name = "tile_wb",
-		.default_value = -156400,
-		.op = kbase_g71_sum_all_shader_cores,
-		.counter_block_offset = SC_BEATS_WR_TIB,
-	},
-	{
-		.name = "gpu_active",
-		.default_value = 115800,
-		.op = kbase_g71_jm_single_counter,
-		.counter_block_offset = JM_GPU_ACTIVE,
-	},
-};
-
-static int kbase_g71_power_model_init(struct kbase_ipa_model *model)
-{
-	int i, err = 0;
-	struct kbase_ipa_model_vinstr_data *model_data;
-
-	model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
-	if (!model_data)
-		return -ENOMEM;
-
-	model_data->kbdev = model->kbdev;
-	model_data->groups_def = ipa_groups_def;
-	BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def) > KBASE_IPA_MAX_GROUP_DEF_NUM);
-	model_data->groups_def_num = ARRAY_SIZE(ipa_groups_def);
-
-	model->model_data = (void *) model_data;
-
-	for (i = 0; i < ARRAY_SIZE(ipa_groups_def); ++i) {
-		const struct kbase_ipa_group *group = &ipa_groups_def[i];
-
-		model_data->group_values[i] = group->default_value;
-		err = kbase_ipa_model_add_param_s32(model, group->name,
-					&model_data->group_values[i],
-					1, false);
-		if (err)
-			goto exit;
-	}
-
-	model_data->scaling_factor = 5;
-	err = kbase_ipa_model_add_param_s32(model, "scale",
-					    &model_data->scaling_factor,
-					    1, false);
-	if (err)
-		goto exit;
-
-	err = kbase_ipa_attach_vinstr(model_data);
-
-exit:
-	if (err) {
-		kbase_ipa_model_param_free_all(model);
-		kfree(model_data);
-	}
-	return err;
-}
-
-static void kbase_g71_power_model_term(struct kbase_ipa_model *model)
-{
-	struct kbase_ipa_model_vinstr_data *model_data =
-			(struct kbase_ipa_model_vinstr_data *)model->model_data;
-
-	kbase_ipa_detach_vinstr(model_data);
-	kfree(model_data);
-}
-
-
-struct kbase_ipa_model_ops kbase_g71_ipa_model_ops = {
-		.name = "mali-g71-power-model",
-		.init = kbase_g71_power_model_init,
-		.term = kbase_g71_power_model_term,
-		.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff,
-		.do_utilization_scaling_in_framework = false,
-};
-KBASE_EXPORT_TEST_API(kbase_g71_ipa_model_ops);
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c
new file mode 100644
index 0000000..8366033
--- /dev/null
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c
@@ -0,0 +1,340 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#include <linux/thermal.h>
+
+#include "mali_kbase_ipa_vinstr_common.h"
+#include "mali_kbase.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+
+/* Performance counter blocks base offsets */
+#define JM_BASE             (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define TILER_BASE          (1 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define MEMSYS_BASE         (2 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+
+/* JM counter block offsets */
+#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT *  6)
+
+/* Tiler counter block offsets */
+#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45)
+
+/* MEMSYS counter block offsets */
+#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25)
+
+/* SC counter block offsets */
+#define SC_FRAG_ACTIVE             (KBASE_IPA_NR_BYTES_PER_CNT *  4)
+#define SC_EXEC_CORE_ACTIVE        (KBASE_IPA_NR_BYTES_PER_CNT * 26)
+#define SC_EXEC_INSTR_COUNT        (KBASE_IPA_NR_BYTES_PER_CNT * 28)
+#define SC_TEX_COORD_ISSUE         (KBASE_IPA_NR_BYTES_PER_CNT * 40)
+#define SC_TEX_TFCH_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 42)
+#define SC_VARY_INSTR              (KBASE_IPA_NR_BYTES_PER_CNT * 49)
+#define SC_VARY_SLOT_32            (KBASE_IPA_NR_BYTES_PER_CNT * 50)
+#define SC_VARY_SLOT_16            (KBASE_IPA_NR_BYTES_PER_CNT * 51)
+#define SC_BEATS_RD_LSC            (KBASE_IPA_NR_BYTES_PER_CNT * 56)
+#define SC_BEATS_WR_LSC            (KBASE_IPA_NR_BYTES_PER_CNT * 61)
+#define SC_BEATS_WR_TIB            (KBASE_IPA_NR_BYTES_PER_CNT * 62)
+
+/**
+ * get_jm_counter() - get performance counter offset inside the Job Manager block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the Job Manager block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g7x_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data,
+						u32 counter_block_offset)
+{
+	return JM_BASE + counter_block_offset;
+}
+
+/**
+ * get_memsys_counter() - get performance counter offset inside the Memory System block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Memory System block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data,
+						    u32 counter_block_offset)
+{
+	/* The base address of Memory System performance counters is always the same, although their number
+	 * may vary based on the number of cores. For the moment it's ok to return a constant.
+	 */
+	return MEMSYS_BASE + counter_block_offset;
+}
+
+/**
+ * get_sc_counter() - get performance counter offset inside the Shader Cores block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Shader Cores block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data,
+						u32 counter_block_offset)
+{
+	const u32 sc_base = MEMSYS_BASE +
+		(model_data->kbdev->gpu_props.props.l2_props.num_l2_slices *
+		 KBASE_IPA_NR_BYTES_PER_BLOCK);
+
+	return sc_base + counter_block_offset;
+}
+
+/**
+ * memsys_single_counter() - calculate energy for a single Memory System performance counter.
+ * @model_data:   pointer to GPU model data.
+ * @coeff:        default value of coefficient for IPA group.
+ * @offset:       offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a single Memory System performance counter.
+ */
+static s64 kbase_g7x_sum_all_memsys_blocks(
+		struct kbase_ipa_model_vinstr_data *model_data,
+		s32 coeff,
+		u32 offset)
+{
+	u32 counter;
+
+	counter = kbase_g7x_power_model_get_memsys_counter(model_data, offset);
+	return kbase_ipa_sum_all_memsys_blocks(model_data, coeff, counter);
+}
+
+/**
+ * sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a Shader Cores performance counter for all cores.
+ */
+static s64 kbase_g7x_sum_all_shader_cores(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff,
+	u32 counter_block_offset)
+{
+	u32 counter;
+
+	counter = kbase_g7x_power_model_get_sc_counter(model_data,
+						       counter_block_offset);
+	return kbase_ipa_sum_all_shader_cores(model_data, coeff, counter);
+}
+
+/**
+ * jm_single_counter() - calculate energy for a single Job Manager performance counter.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a single Job Manager performance counter.
+ */
+static s64 kbase_g7x_jm_single_counter(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff,
+	u32 counter_block_offset)
+{
+	u32 counter;
+
+	counter = kbase_g7x_power_model_get_jm_counter(model_data,
+						     counter_block_offset);
+	return kbase_ipa_single_counter(model_data, coeff, counter);
+}
+
+/**
+ * get_active_cycles() - return the GPU_ACTIVE counter
+ * @model_data:            pointer to GPU model data.
+ *
+ * Return: the number of cycles the GPU was active during the counter sampling
+ * period.
+ */
+static u32 kbase_g7x_get_active_cycles(
+	struct kbase_ipa_model_vinstr_data *model_data)
+{
+	u32 counter = kbase_g7x_power_model_get_jm_counter(model_data, JM_GPU_ACTIVE);
+
+	/* Counters are only 32-bit, so we can safely multiply by 1 then cast
+	 * the 64-bit result back to a u32.
+	 */
+	return kbase_ipa_single_counter(model_data, 1, counter);
+}
+
+/** Table of IPA group definitions.
+ *
+ * For each IPA group, this table defines a function to access the given performance block counter (or counters,
+ * if the operation needs to be iterated on multiple blocks) and calculate energy estimation.
+ */
+
+static const struct kbase_ipa_group ipa_groups_def_g71[] = {
+	{
+		.name = "l2_access",
+		.default_value = 526300,
+		.op = kbase_g7x_sum_all_memsys_blocks,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 301100,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "tex_issue",
+		.default_value = 197400,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_COORD_ISSUE,
+	},
+	{
+		.name = "tile_wb",
+		.default_value = -156400,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_BEATS_WR_TIB,
+	},
+	{
+		.name = "gpu_active",
+		.default_value = 115800,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+};
+
+static const struct kbase_ipa_group ipa_groups_def_g72[] = {
+	{
+		.name = "l2_access",
+		.default_value = 393000,
+		.op = kbase_g7x_sum_all_memsys_blocks,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 227000,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "tex_issue",
+		.default_value = 181900,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_COORD_ISSUE,
+	},
+	{
+		.name = "tile_wb",
+		.default_value = -120200,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_BEATS_WR_TIB,
+	},
+	{
+		.name = "gpu_active",
+		.default_value = 133100,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+};
+
+static const struct kbase_ipa_group ipa_groups_def_tnox[] = {
+	{
+		.name = "gpu_active",
+		.default_value = 122000,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 488900,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "vary_instr",
+		.default_value = 212100,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_VARY_INSTR,
+	},
+	{
+		.name = "tex_tfch_num_operations",
+		.default_value = 288000,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS,
+	},
+	{
+		.name = "l2_access",
+		.default_value = 378100,
+		.op = kbase_g7x_sum_all_memsys_blocks,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+};
+
+static const struct kbase_ipa_group ipa_groups_def_tgox_r1[] = {
+	{
+		.name = "gpu_active",
+		.default_value = 224200,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 384700,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "vary_instr",
+		.default_value = 271900,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_VARY_INSTR,
+	},
+	{
+		.name = "tex_tfch_num_operations",
+		.default_value = 477700,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS,
+	},
+	{
+		.name = "l2_access",
+		.default_value = 551400,
+		.op = kbase_g7x_sum_all_memsys_blocks,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+};
+
+#define STANDARD_POWER_MODEL(gpu, reference_voltage) \
+	static int kbase_ ## gpu ## _power_model_init(\
+			struct kbase_ipa_model *model) \
+	{ \
+		BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def_ ## gpu) > \
+				KBASE_IPA_MAX_GROUP_DEF_NUM); \
+		return kbase_ipa_vinstr_common_model_init(model, \
+				ipa_groups_def_ ## gpu, \
+				ARRAY_SIZE(ipa_groups_def_ ## gpu), \
+				kbase_g7x_get_active_cycles, \
+				(reference_voltage)); \
+	} \
+	struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
+		.name = "mali-" #gpu "-power-model", \
+		.init = kbase_ ## gpu ## _power_model_init, \
+		.term = kbase_ipa_vinstr_common_model_term, \
+		.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
+	}; \
+	KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
+
+STANDARD_POWER_MODEL(g71, 800);
+STANDARD_POWER_MODEL(g72, 800);
+STANDARD_POWER_MODEL(tnox, 800);
+STANDARD_POWER_MODEL(tgox_r1, 1000);
diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
index e0eebd8..10da0c5 100644
--- a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -31,7 +31,6 @@
 enum base_hw_feature {
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
-	BASE_HW_FEATURE_33BIT_VA,
 	BASE_HW_FEATURE_XAFFINITY,
 	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
 	BASE_HW_FEATURE_MRT,
@@ -85,7 +84,6 @@
 };
 
 static const enum base_hw_feature base_hw_features_t72x[] = {
-	BASE_HW_FEATURE_33BIT_VA,
 	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
 	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
 	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
@@ -139,7 +137,6 @@
 };
 
 static const enum base_hw_feature base_hw_features_t83x[] = {
-	BASE_HW_FEATURE_33BIT_VA,
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
 	BASE_HW_FEATURE_XAFFINITY,
@@ -162,7 +159,6 @@
 };
 
 static const enum base_hw_feature base_hw_features_t82x[] = {
-	BASE_HW_FEATURE_33BIT_VA,
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
 	BASE_HW_FEATURE_XAFFINITY,
@@ -238,7 +234,6 @@
 };
 
 static const enum base_hw_feature base_hw_features_tSIx[] = {
-	BASE_HW_FEATURE_33BIT_VA,
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
 	BASE_HW_FEATURE_XAFFINITY,
@@ -266,7 +261,6 @@
 };
 
 static const enum base_hw_feature base_hw_features_tDVx[] = {
-	BASE_HW_FEATURE_33BIT_VA,
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
 	BASE_HW_FEATURE_XAFFINITY,
diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
index 7b70e7a..19ffd69 100644
--- a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -97,6 +97,7 @@
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
 	BASE_HW_ISSUE_T720_1386,
 	BASE_HW_ISSUE_T76X_26,
 	BASE_HW_ISSUE_T76X_1909,
@@ -111,6 +112,7 @@
 	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_T76X_3966,
 	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
 	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_TMIX_7940,
 	BASE_HW_ISSUE_TMIX_8042,
@@ -123,6 +125,8 @@
 	GPUCORE_1619,
 	BASE_HW_ISSUE_TSIX_1116,
 	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TNOX_1194,
+	BASE_HW_ISSUE_TGOX_R1_1234,
 	BASE_HW_ISSUE_END
 };
 
@@ -189,6 +193,7 @@
 	BASE_HW_ISSUE_11035,
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_3964,
 	GPUCORE_1619,
@@ -230,6 +235,7 @@
 	BASE_HW_ISSUE_11035,
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_TMIX_8438,
@@ -267,6 +273,7 @@
 	BASE_HW_ISSUE_11035,
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3964,
@@ -302,6 +309,7 @@
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_TMIX_8438,
@@ -326,6 +334,7 @@
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3964,
@@ -349,6 +358,7 @@
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_TMIX_8438,
@@ -527,6 +537,7 @@
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3964,
@@ -547,6 +558,7 @@
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
 	BASE_HW_ISSUE_T720_1386,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
@@ -568,6 +580,7 @@
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
 	BASE_HW_ISSUE_T720_1386,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
@@ -834,6 +847,7 @@
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
 	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
 	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_TMIX_8438,
 	BASE_HW_ISSUE_END
@@ -854,6 +868,7 @@
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
 	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
 	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_TMIX_8438,
 	BASE_HW_ISSUE_END
@@ -869,6 +884,7 @@
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
 	BASE_HW_ISSUE_TMIX_7891,
 	GPUCORE_1619,
 	BASE_HW_ISSUE_TMIX_8438,
@@ -892,6 +908,7 @@
 	BASE_HW_ISSUE_T76X_3960,
 	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
 	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_TMIX_8438,
 	BASE_HW_ISSUE_END
@@ -913,6 +930,7 @@
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
 	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
 	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_TMIX_8438,
 	BASE_HW_ISSUE_END
@@ -933,6 +951,7 @@
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
 	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
 	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_TMIX_8438,
 	BASE_HW_ISSUE_END
@@ -947,6 +966,7 @@
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
 	BASE_HW_ISSUE_TMIX_7891,
 	GPUCORE_1619,
 	BASE_HW_ISSUE_END
@@ -1123,6 +1143,7 @@
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TNOX_1194,
 	BASE_HW_ISSUE_END
 };
 
@@ -1138,6 +1159,15 @@
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TNOX_1194,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TGOX_R1_1234,
 	BASE_HW_ISSUE_END
 };
 
diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h
index e6b568f..cc44ff2 100644
--- a/drivers/gpu/arm/midgard/mali_base_kernel.h
+++ b/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -127,18 +127,19 @@
  */
 #define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
 
-	/* BASE_MEM_HINT flags have been removed, but their values are reserved
-	 * for backwards compatibility with older user-space drivers. The values
-	 * can be re-used once support for r5p0 user-space drivers is removed,
-	 * presumably in r7p0.
-	 *
-	 * RESERVED: (1U << 5)
-	 * RESERVED: (1U << 6)
-	 * RESERVED: (1U << 7)
-	 * RESERVED: (1U << 8)
-	 */
-#define BASE_MEM_RESERVED_BIT_5 ((base_mem_alloc_flags)1 << 5)
-#define BASE_MEM_RESERVED_BIT_6 ((base_mem_alloc_flags)1 << 6)
+/* Will be permanently mapped in kernel space.
+ * Flag is only allowed on allocations originating from kbase.
+ */
+#define BASE_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
+
+/* The allocation will completely reside within the same 4GB chunk in the GPU
+ * virtual space.
+ * Since this flag is primarily required only for the TLS memory which will
+ * not be used to contain executable code and also not used for Tiler heap,
+ * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
+ */
+#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
+
 #define BASE_MEM_RESERVED_BIT_7 ((base_mem_alloc_flags)1 << 7)
 #define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
 
@@ -192,6 +193,7 @@
  * Do not remove, use the next unreserved bit for new flags
  */
 #define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
+#define BASE_MEM_MAYBE_RESERVED_BIT_19 BASE_MEM_RESERVED_BIT_19
 
 /**
  * Memory starting from the end of the initial commit is aligned to 'extent'
@@ -200,11 +202,20 @@
  */
 #define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20)
 
+/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu mode.
+ * Some components within the GPU might only be able to access memory that is
+ * GPU cacheable. Refer to the specific GPU implementation for more details.
+ * The 3 shareability flags will be ignored for GPU uncached memory.
+ * If used while importing USER_BUFFER type memory, then the import will fail
+ * if the memory is not aligned to GPU and CPU cache line width.
+ */
+#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
+
 /* Number of bits used as flags for base memory management
  *
  * Must be kept in sync with the base_mem_alloc_flags flags
  */
-#define BASE_MEM_FLAGS_NR_BITS 21
+#define BASE_MEM_FLAGS_NR_BITS 22
 
 /* A mask for all output bits, excluding IN/OUT bits.
  */
@@ -226,9 +237,13 @@
 /* A mask of all currently reserved flags
  */
 #define BASE_MEM_FLAGS_RESERVED \
-	(BASE_MEM_RESERVED_BIT_5 | BASE_MEM_RESERVED_BIT_6 | \
-		BASE_MEM_RESERVED_BIT_7 | BASE_MEM_RESERVED_BIT_8 | \
-		BASE_MEM_RESERVED_BIT_19)
+	(BASE_MEM_RESERVED_BIT_7 | BASE_MEM_RESERVED_BIT_8 | \
+		BASE_MEM_MAYBE_RESERVED_BIT_19)
+
+/* A mask of all the flags which are only valid for allocations within kbase,
+ * and may not be passed from user space.
+ */
+#define BASE_MEM_FLAGS_KERNEL_ONLY (BASE_MEM_PERMANENT_KERNEL_MAPPING)
 
 /* A mask of all the flags that can be returned via the base_mem_get_flags()
  * interface.
@@ -236,13 +251,13 @@
 #define BASE_MEM_FLAGS_QUERYABLE \
 	(BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \
 		BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \
-		BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED))
+		BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \
+		BASE_MEM_FLAGS_KERNEL_ONLY))
 
 /**
  * enum base_mem_import_type - Memory types supported by @a base_mem_import
  *
  * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
- * @BASE_MEM_IMPORT_TYPE_UMP: UMP import. Handle type is ump_secure_id.
  * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
  * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
  * base_mem_import_user_buffer
@@ -257,7 +272,9 @@
  */
 typedef enum base_mem_import_type {
 	BASE_MEM_IMPORT_TYPE_INVALID = 0,
-	BASE_MEM_IMPORT_TYPE_UMP = 1,
+	/**
+	 * Import type with value 1 is deprecated.
+	 */
 	BASE_MEM_IMPORT_TYPE_UMM = 2,
 	BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
 } base_mem_import_type;
@@ -303,13 +320,15 @@
 #define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
 #define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
 #define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE     (4ull  << 12)
-/* reserved handles ..-64<<PAGE_SHIFT> for future special handles */
+/* reserved handles ..-48<<PAGE_SHIFT> for future special handles */
 #define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
 #define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
 						BASE_MEM_COOKIE_BASE)
 
 /* Mask to detect 4GB boundary alignment */
 #define BASE_MEM_MASK_4GB  0xfffff000UL
+/* Mask to detect 4GB boundary (in page units) alignment */
+#define BASE_MEM_PFN_MASK_4GB  (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT)
 
 /**
  * Limit on the 'extent' parameter for an allocation with the
@@ -317,12 +336,17 @@
  *
  * This is the same as the maximum limit for a Buffer Descriptor's chunk size
  */
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2 \
+		(21u - (LOCAL_PAGE_SHIFT))
 #define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES \
-		((2ull * 1024ull * 1024ull) >> (LOCAL_PAGE_SHIFT))
+		(1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2))
 
 /* Bit mask of cookies used for for memory allocation setup */
 #define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
 
+/* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */
+#define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */
+
 
 /**
  * @brief Result codes of changing the size of the backing store allocated to a tmem region
@@ -434,6 +458,13 @@
 };
 
 /**
+ * Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
+ * initial commit is aligned to 'extent' pages, where 'extent' must be a power
+ * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
+ */
+#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP  (1 << 0)
+
+/**
  * struct base_jit_alloc_info - Structure which describes a JIT allocation
  *                              request.
  * @gpu_alloc_addr:             The GPU virtual address to write the JIT
@@ -446,6 +477,18 @@
  * @id:                         Unique ID provided by the caller, this is used
  *                              to pair allocation and free requests.
  *                              Zero is not a valid value.
+ * @bin_id:                     The JIT allocation bin, used in conjunction with
+ *                              @max_allocations to limit the number of each
+ *                              type of JIT allocation.
+ * @max_allocations:            The maximum number of allocations allowed within
+ *                              the bin specified by @bin_id. Should be the same
+ *                              for all JIT allocations within the same bin.
+ * @flags:                      flags specifying the special requirements for
+ *                              the JIT allocation.
+ * @padding:                    Expansion space - should be initialised to zero
+ * @usage_id:                   A hint about which allocation should be reused.
+ *                              The kernel should attempt to use a previous
+ *                              allocation with the same usage_id
  */
 struct base_jit_alloc_info {
 	u64 gpu_alloc_addr;
@@ -453,6 +496,11 @@
 	u64 commit_pages;
 	u64 extent;
 	u8 id;
+	u8 bin_id;
+	u8 max_allocations;
+	u8 flags;
+	u8 padding[2];
+	u16 usage_id;
 };
 
 /**
@@ -616,9 +664,10 @@
 /**
  * SW only requirement: Just In Time allocation
  *
- * This job requests a JIT allocation based on the request in the
- * @base_jit_alloc_info structure which is passed via the jc element of
- * the atom.
+ * This job requests a single or multiple JIT allocations through a list
+ * of @base_jit_alloc_info structure which is passed via the jc element of
+ * the atom. The number of @base_jit_alloc_info structures present in the
+ * list is passed via the nr_extres element of the atom
  *
  * It should be noted that the id entry in @base_jit_alloc_info must not
  * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE.
@@ -632,9 +681,9 @@
 /**
  * SW only requirement: Just In Time free
  *
- * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC
- * to be freed. The ID of the JIT allocation is passed via the jc element of
- * the atom.
+ * This job requests a single or multiple JIT allocations created by
+ * @BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the JIT
+ * allocations is passed via the jc element of the atom.
  *
  * The job will complete immediately.
  */
@@ -750,41 +799,20 @@
 	(core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
 
 /**
- * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which
- * handles retaining cores for power management and affinity management.
+ * enum kbase_atom_coreref_state - States to model state machine processed by
+ * kbasep_js_job_check_ref_cores(), which handles retaining cores for power
+ * management.
  *
- * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack
- * where lots of atoms could be submitted before powerup, and each has an
- * affinity chosen that causes other atoms to have an affinity
- * violation. Whilst the affinity was not causing violations at the time it
- * was chosen, it could cause violations thereafter. For example, 1000 jobs
- * could have had their affinity chosen during the powerup time, so any of
- * those 1000 jobs could cause an affinity violation later on.
- *
- * The attack would otherwise occur because other atoms/contexts have to wait for:
- * -# the currently running atoms (which are causing the violation) to
- * finish
- * -# and, the atoms that had their affinity chosen during powerup to
- * finish. These are run preferentially because they don't cause a
- * violation, but instead continue to cause the violation in others.
- * -# or, the attacker is scheduled out (which might not happen for just 2
- * contexts)
- *
- * By re-choosing the affinity (which is designed to avoid violations at the
- * time it's chosen), we break condition (2) of the wait, which minimizes the
- * problem to just waiting for current jobs to finish (which can be bounded if
- * the Job Scheduling Policy has a timer).
+ * @KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: Starting state: Cores must be
+ * requested.
+ * @KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: Cores requested, but
+ * waiting for them to be powered
+ * @KBASE_ATOM_COREREF_STATE_READY: Cores are powered, atom can be submitted to
+ * HW
  */
 enum kbase_atom_coreref_state {
-	/** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */
 	KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
-	/** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */
 	KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
-	/** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */
-	KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY,
-	/** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */
-	KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS,
-	/** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */
 	KBASE_ATOM_COREREF_STATE_READY
 };
 
@@ -862,7 +890,7 @@
 	u64 jc;			    /**< job-chain GPU address */
 	struct base_jd_udata udata;		    /**< user data */
 	u64 extres_list;	    /**< list of external resources */
-	u16 nr_extres;			    /**< nr of external resources */
+	u16 nr_extres;			    /**< nr of external resources or JIT allocations */
 	u16 compat_core_req;	            /**< core requirements which correspond to the legacy support for UK 10.2 */
 	struct base_dependency pre_dep[2];  /**< pre-dependencies, one need to use SETTER function to assign this field,
 	this is done in order to reduce possibility of improper assigment of a dependency field */
@@ -1412,6 +1440,11 @@
 	 * client will not be expecting to allocate anywhere near this value.
 	 */
 	u64 gpu_available_memory_size;
+
+	/**
+	 * The number of execution engines.
+	 */
+	u8 num_exec_engines;
 };
 
 /**
@@ -1442,7 +1475,10 @@
 	u8  max_task_queue;         /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
 	u8  max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */
 	u8  impl_tech;              /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
-	u8  padding[7];
+	u8  padding[3];
+	u32 tls_alloc;              /* Number of threads per core that TLS must
+				     * be allocated for
+				     */
 };
 
 /**
@@ -1524,7 +1560,7 @@
 	u64 stack_present;
 
 	u32 l2_features;
-	u32 suspend_size; /* API 8.2+ */
+	u32 core_features;
 	u32 mem_features;
 	u32 mmu_features;
 
@@ -1547,6 +1583,8 @@
 	 * available modes as exposed in the coherency_features register.
 	 */
 	u32 coherency_mode;
+
+	u32 thread_tls_alloc;
 };
 
 /**
@@ -1582,39 +1620,44 @@
  */
 
 /**
- * \enum base_context_create_flags
- *
  * Flags to pass to ::base_context_init.
  * Flags can be ORed together to enable multiple things.
  *
  * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
  * not collide with them.
  */
-enum base_context_create_flags {
-	/** No flags set */
-	BASE_CONTEXT_CREATE_FLAG_NONE = 0,
+typedef u32 base_context_create_flags;
 
-	/** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */
-	BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0),
+/** No flags set */
+#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
 
-	/** Base context is a 'System Monitor' context for Hardware counters.
-	 *
-	 * One important side effect of this is that job submission is disabled. */
-	BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1)
-};
+/** Base context is embedded in a cctx object (flag used for CINSTR
+ * software counter macros)
+ */
+#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
+
+/** Base context is a 'System Monitor' context for Hardware counters.
+ *
+ * One important side effect of this is that job submission is disabled.
+ */
+#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
+	((base_context_create_flags)1 << 1)
+
 
 /**
- * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init()
+ * Bitpattern describing the ::base_context_create_flags that can be
+ * passed to base_context_init()
  */
 #define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
-	(((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \
-	  ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED))
+	(BASE_CONTEXT_CCTX_EMBEDDED | \
+	 BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
 
 /**
- * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel
+ * Bitpattern describing the ::base_context_create_flags that can be
+ * passed to the kernel
  */
 #define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
-	((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
+	BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED
 
 /*
  * Private flags used on the base context
@@ -1739,4 +1782,5 @@
 #define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
 		BASE_TLSTREAM_JOB_DUMPING_ENABLED)
 
+
 #endif				/* _BASE_KERNEL_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h
index 7174ef2..dc0d5f1 100644
--- a/drivers/gpu/arm/midgard/mali_kbase.h
+++ b/drivers/gpu/arm/midgard/mali_kbase.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -59,10 +59,9 @@
 #include "mali_kbase_context.h"
 #include "mali_kbase_strings.h"
 #include "mali_kbase_mem_lowlevel.h"
-#include "mali_kbase_trace_timeline.h"
 #include "mali_kbase_js.h"
-#include "mali_kbase_mem.h"
 #include "mali_kbase_utility.h"
+#include "mali_kbase_mem.h"
 #include "mali_kbase_gpu_memory_debugfs.h"
 #include "mali_kbase_mem_profile_debugfs.h"
 #include "mali_kbase_debug_job_fault.h"
@@ -77,6 +76,7 @@
 #include <trace/events/gpu.h>
 #endif
 
+
 #ifndef u64_to_user_ptr
 /* Introduced in Linux v4.6 */
 #define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x)
@@ -111,10 +111,6 @@
 
 void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
 
-struct kbase_context *
-kbase_create_context(struct kbase_device *kbdev, bool is_compat);
-void kbase_destroy_context(struct kbase_context *kctx);
-
 
 /**
  * kbase_get_unmapped_area() - get an address range which is currently
@@ -243,6 +239,44 @@
 void kbase_event_cleanup(struct kbase_context *kctx);
 void kbase_event_wakeup(struct kbase_context *kctx);
 
+/**
+ * kbasep_jit_alloc_validate() - Validate the JIT allocation info.
+ *
+ * @kctx:	Pointer to the kbase context within which the JIT
+ *		allocation is to be validated.
+ * @info:	Pointer to struct @base_jit_alloc_info
+ *			which is to be validated.
+ * @return: 0 if jit allocation is valid; negative error code otherwise
+ */
+int kbasep_jit_alloc_validate(struct kbase_context *kctx,
+					struct base_jit_alloc_info *info);
+/**
+ * kbase_mem_copy_from_extres_page() - Copy pages from external resources.
+ *
+ * @kctx:		kbase context within which the copying is to take place.
+ * @extres_pages:	Pointer to the pages which correspond to the external
+ *			resources from which the copying will take place.
+ * @pages:		Pointer to the pages to which the content is to be
+ *			copied from the provided external resources.
+ * @nr_pages:		Number of pages to copy.
+ * @target_page_nr:	Number of target pages which will be used for copying.
+ * @offset:		Offset into the target pages from which the copying
+ *			is to be performed. 
+ * @to_copy:		Size of the chunk to be copied, in bytes. 
+ */
+void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
+		void *extres_page, struct page **pages, unsigned int nr_pages,
+		unsigned int *target_page_nr, size_t offset, size_t *to_copy);
+/**
+ * kbase_mem_copy_from_extres() - Copy from external resources.
+ *
+ * @kctx:	kbase context within which the copying is to take place.
+ * @buf_data:	Pointer to the information about external resources:
+ *		pages pertaining to the external resource, number of
+ *		pages to copy.
+ */
+int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+		struct kbase_debug_copy_buffer *buf_data);
 int kbase_process_soft_job(struct kbase_jd_atom *katom);
 int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
 void kbase_finish_soft_job(struct kbase_jd_atom *katom);
@@ -258,15 +292,9 @@
 
 bool kbase_replay_process(struct kbase_jd_atom *katom);
 
-void kbasep_soft_job_timeout_worker(unsigned long data);
+void kbasep_soft_job_timeout_worker(struct timer_list *timer);
 void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
 
-/* api used internally for register access. Contains validation and tracing */
-void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
-int kbase_device_trace_buffer_install(
-		struct kbase_context *kctx, u32 *tb, size_t size);
-void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
-
 void kbasep_as_do_poke(struct work_struct *work);
 
 /** Returns the name associated with a Mali exception code
@@ -297,6 +325,29 @@
 }
 
 /**
+ * kbase_pm_is_active - Determine whether the GPU is active
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This takes into account the following
+ *
+ * - whether there is an active context reference
+ *
+ * - whether any of the shader cores or the tiler are needed
+ *
+ * It should generally be preferred against checking just
+ * kbdev->pm.active_count on its own, because some code paths drop their
+ * reference on this whilst still having the shader cores/tiler in use.
+ *
+ * Return: true if the GPU is active, false otherwise
+ */
+static inline bool kbase_pm_is_active(struct kbase_device *kbdev)
+{
+	return (kbdev->pm.active_count > 0 || kbdev->shader_needed_cnt ||
+			kbdev->tiler_needed_cnt);
+}
+
+/**
  * Return the atom's ID, as was originally supplied by userspace in
  * base_jd_atom_v2::atom_number
  */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
index e0e40a9..8d71926 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2013-2015,2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2015,2017-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,11 +23,6 @@
 #include <mali_kbase.h>
 #include <mali_kbase_10969_workaround.h>
 
-/* This function is used to solve an HW issue with single iterator GPUs.
- * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the
- * restart index is out of bounds and the rerun causes a tile range fault. If this happens
- * we try to clamp the restart index to a correct value and rerun the job.
- */
 /* Mask of X and Y coordinates for the coordinates words in the descriptors*/
 #define X_COORDINATE_MASK 0x00000FFF
 #define Y_COORDINATE_MASK 0x0FFF0000
@@ -79,7 +74,7 @@
 
 	page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
 
-	p = phys_to_page(as_phys_addr_t(page_array[page_index]));
+	p = as_page(page_array[page_index]);
 
 	/* we need the first 10 words of the fragment shader job descriptor.
 	 * We need to check that the offset + 10 words is less that the page
@@ -103,7 +98,7 @@
 	/* The data needed overflows page the dimension,
 	 * need to map the subsequent page */
 	if (copy_size < JOB_HEADER_SIZE) {
-		p = phys_to_page(as_phys_addr_t(page_array[page_index + 1]));
+		p = as_page(page_array[page_index + 1]);
 		page_2 = kmap_atomic(p);
 
 		kbase_sync_single_for_cpu(katom->kctx->kbdev,
@@ -186,7 +181,7 @@
 
 		/* Flush CPU cache to update memory for future GPU reads*/
 		memcpy(page_1, dst, copy_size);
-		p = phys_to_page(as_phys_addr_t(page_array[page_index]));
+		p = as_page(page_array[page_index]);
 
 		kbase_sync_single_for_device(katom->kctx->kbdev,
 				kbase_dma_addr(p) + offset,
@@ -195,8 +190,7 @@
 		if (copy_size < JOB_HEADER_SIZE) {
 			memcpy(page_2, dst + copy_size,
 					JOB_HEADER_SIZE - copy_size);
-			p = phys_to_page(as_phys_addr_t(page_array[page_index +
-								   1]));
+			p = as_page(page_array[page_index + 1]);
 
 			kbase_sync_single_for_device(katom->kctx->kbdev,
 					kbase_dma_addr(p),
diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
index 624dc4a..379a05a 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2014, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,6 +23,15 @@
 #ifndef _KBASE_10969_WORKAROUND_
 #define _KBASE_10969_WORKAROUND_
 
+/**
+ * kbasep_10969_workaround_clamp_coordinates - Apply the WA to clamp the restart indices
+ * @katom: atom representing the fragment job for which the WA has to be applied
+ *
+ * This workaround is used to solve an HW issue with single iterator GPUs.
+ * If a fragment job is soft-stopped on the edge of its bounding box, it can happen
+ * that the restart index is out of bounds and the rerun causes a tile range
+ * fault. If this happens we try to clamp the restart index to a correct value.
+ */
 int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
 
 #endif /* _KBASE_10969_WORKAROUND_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
index 1dee5cb..2e99a4d 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -41,13 +41,14 @@
 	list_for_each(entry, kbdev_list) {
 		kbdev = list_entry(entry, struct kbase_device, entry);
 
-		if(kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) {
+		if (kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) {
 
 			/* don't show this one again until another fault occors */
 			kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no);
 
 			/* output the last page fault addr */
-			seq_printf(sfile, "%llu\n", (u64) kbdev->as[as_no].fault_addr);
+			seq_printf(sfile, "%llu\n",
+				   (u64) kbdev->as[as_no].fault_addr);
 		}
 
 	}
@@ -59,7 +60,7 @@
 
 static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file)
 {
-	return single_open(file, kbase_as_fault_read , in->i_private);
+	return single_open(file, kbase_as_fault_read, in->i_private);
 }
 
 static const struct file_operations as_fault_fops = {
@@ -89,17 +90,20 @@
 	KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64));
 
 	debugfs_directory = debugfs_create_dir("address_spaces",
-		kbdev->mali_debugfs_directory);
+					       kbdev->mali_debugfs_directory);
 
-	if(debugfs_directory) {
-		for(i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+	if (debugfs_directory) {
+		for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
 			snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
 			debugfs_create_file(as_name, S_IRUGO,
-				debugfs_directory, (void*) ((uintptr_t) i), &as_fault_fops);
+					    debugfs_directory,
+					    (void *)(uintptr_t)i,
+					    &as_fault_fops);
 		}
+	} else {
+		dev_warn(kbdev->dev,
+			 "unable to create address_spaces debugfs directory");
 	}
-	else
-		dev_warn(kbdev->dev, "unable to create address_spaces debugfs directory");
 
 #endif /* CONFIG_MALI_DEBUG */
 #endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
index 18444b8..27a03cf 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,7 +30,12 @@
 
 /*
  * The output flags should be a combination of the following values:
- * KBASE_REG_CPU_CACHED: CPU cache should be enabled.
+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled
+ * KBASE_REG_GPU_CACHED: GPU cache should be enabled
+ *
+ * NOTE: Some components within the GPU might only be able to access memory
+ * that is KBASE_REG_GPU_CACHED. Refer to the specific GPU implementation for
+ * more details.
  */
 u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
 {
@@ -38,6 +43,9 @@
 
 	CSTD_UNUSED(nr_pages);
 
+	if (!(flags & BASE_MEM_UNCACHED_GPU))
+		cache_flags |= KBASE_REG_GPU_CACHED;
+
 	if (flags & BASE_MEM_CACHED_CPU)
 		cache_flags |= KBASE_REG_CPU_CACHED;
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
index dbb4f97..376a94b 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -151,17 +151,11 @@
 #define DEFAULT_3BIT_AWID_LIMIT KBASE_3BIT_AID_32
 
 /**
- * Default UMP device mapping. A UMP_DEVICE_<device>_SHIFT value which
- * defines which UMP device this GPU should be mapped to.
- */
-#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT
-
-/*
  * Default period for DVFS sampling
  */
 #define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
 
-/*
+/**
  * Power Management poweroff tick granuality. This is in nanoseconds to
  * allow HR timer support.
  *
@@ -171,22 +165,22 @@
  */
 #define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
 
-/*
+/**
  * Power Manager number of ticks before shader cores are powered off
  */
 #define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
 
-/*
+/**
  * Power Manager number of ticks before GPU is powered off
  */
 #define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
 
-/*
+/**
  * Default scheduling tick granuality
  */
 #define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
 
-/*
+/**
  * Default minimum number of scheduling ticks before jobs are soft-stopped.
  *
  * This defines the time-slice for a job (which may be different from that of a
@@ -194,60 +188,60 @@
  */
 #define DEFAULT_JS_SOFT_STOP_TICKS       (1) /* 100ms-200ms */
 
-/*
+/**
  * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
  */
 #define DEFAULT_JS_SOFT_STOP_TICKS_CL    (1) /* 100ms-200ms */
 
-/*
+/**
  * Default minimum number of scheduling ticks before jobs are hard-stopped
  */
 #define DEFAULT_JS_HARD_STOP_TICKS_SS    (50) /* 5s */
 #define DEFAULT_JS_HARD_STOP_TICKS_SS_8408  (300) /* 30s */
 
-/*
+/**
  * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
  */
 #define DEFAULT_JS_HARD_STOP_TICKS_CL    (50) /* 5s */
 
-/*
+/**
  * Default minimum number of scheduling ticks before jobs are hard-stopped
  * during dumping
  */
 #define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
 
-/*
+/**
  * Default timeout for some software jobs, after which the software event wait
  * jobs will be cancelled.
  */
 #define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */
 
-/*
+/**
  * Default minimum number of scheduling ticks before the GPU is reset to clear a
  * "stuck" job
  */
 #define DEFAULT_JS_RESET_TICKS_SS           (55) /* 5.5s */
 #define DEFAULT_JS_RESET_TICKS_SS_8408     (450) /* 45s */
 
-/*
+/**
  * Default minimum number of scheduling ticks before the GPU is reset to clear a
  * "stuck" CL job.
  */
 #define DEFAULT_JS_RESET_TICKS_CL        (55) /* 5.5s */
 
-/*
+/**
  * Default minimum number of scheduling ticks before the GPU is reset to clear a
  * "stuck" job during dumping.
  */
 #define DEFAULT_JS_RESET_TICKS_DUMPING   (15020) /* 1502s */
 
-/*
+/**
  * Default number of milliseconds given for other jobs on the GPU to be
  * soft-stopped when the GPU needs to be reset.
  */
 #define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
 
-/*
+/**
  * Default timeslice that a context is scheduled in for, in nanoseconds.
  *
  * When a context has used up this amount of time across its jobs, it is
@@ -258,7 +252,7 @@
  */
 #define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
 
-/*
+/**
  * Perform GPU power down using only platform specific code, skipping DDK power
  * management.
  *
@@ -270,9 +264,9 @@
  * Note that as this prevents kbase from powering down shader cores, this limits
  * the available power policies to coarse_demand and always_on.
  */
-#define PLATFORM_POWER_DOWN_ONLY (0)
+#define PLATFORM_POWER_DOWN_ONLY (1)
 
-/*
+/**
  * Maximum frequency (in kHz) that the GPU can be clocked. For some platforms
  * this isn't available, so we simply define a dummy value here. If devfreq
  * is enabled the value will be read from there, otherwise this should be
diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c
index 868442a..970be89 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_context.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -32,15 +32,6 @@
 #include <mali_kbase_dma_fence.h>
 #include <mali_kbase_ctx_sched.h>
 
-/**
- * kbase_create_context() - Create a kernel base context.
- * @kbdev: Kbase device
- * @is_compat: Force creation of a 32-bit context
- *
- * Allocate and init a kernel base context.
- *
- * Return: new kbase context
- */
 struct kbase_context *
 kbase_create_context(struct kbase_device *kbdev, bool is_compat)
 {
@@ -69,9 +60,6 @@
 		kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA);
 #endif /* !defined(CONFIG_64BIT) */
 
-#ifdef CONFIG_MALI_TRACE_TIMELINE
-	kctx->timeline.owner_tgid = task_tgid_nr(current);
-#endif
 	atomic_set(&kctx->setup_complete, 0);
 	atomic_set(&kctx->setup_in_progress, 0);
 	spin_lock_init(&kctx->mm_update_lock);
@@ -115,11 +103,12 @@
 	if (err)
 		goto free_jd;
 
+
 	atomic_set(&kctx->drain_pending, 0);
 
 	mutex_init(&kctx->reg_lock);
 
-	mutex_init(&kctx->mem_partials_lock);
+	spin_lock_init(&kctx->mem_partials_lock);
 	INIT_LIST_HEAD(&kctx->mem_partials);
 
 	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
@@ -128,21 +117,10 @@
 	if (err)
 		goto free_event;
 
-	err = kbase_mmu_init(kctx);
+	err = kbase_mmu_init(kbdev, &kctx->mmu, kctx);
 	if (err)
 		goto term_dma_fence;
 
-	do {
-		err = kbase_mem_pool_grow(&kctx->mem_pool,
-				MIDGARD_MMU_BOTTOMLEVEL);
-		if (err)
-			goto pgd_no_mem;
-
-		mutex_lock(&kctx->mmu_lock);
-		kctx->pgd = kbase_mmu_alloc_pgd(kctx);
-		mutex_unlock(&kctx->mmu_lock);
-	} while (!kctx->pgd);
-
 	p = kbase_mem_alloc_page(&kctx->mem_pool);
 	if (!p)
 		goto no_sink_page;
@@ -152,6 +130,7 @@
 
 	kctx->cookies = KBASE_COOKIE_MASK;
 
+
 	/* Make sure page 0 is not used... */
 	err = kbase_region_tracker_init(kctx);
 	if (err)
@@ -167,17 +146,13 @@
 #ifdef CONFIG_GPU_TRACEPOINTS
 	atomic_set(&kctx->jctx.work_id, 0);
 #endif
-#ifdef CONFIG_MALI_TRACE_TIMELINE
-	atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
-#endif
 
 	kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
 
 	mutex_init(&kctx->vinstr_cli_lock);
 
-	setup_timer(&kctx->soft_job_timeout,
-		    kbasep_soft_job_timeout_worker,
-		    (uintptr_t)kctx);
+	kbase_timer_setup(&kctx->soft_job_timeout,
+			  kbasep_soft_job_timeout_worker);
 
 	return kctx;
 
@@ -190,12 +165,7 @@
 no_region_tracker:
 	kbase_mem_pool_free(&kctx->mem_pool, p, false);
 no_sink_page:
-	/* VM lock needed for the call to kbase_mmu_free_pgd */
-	kbase_gpu_vm_lock(kctx);
-	kbase_mmu_free_pgd(kctx);
-	kbase_gpu_vm_unlock(kctx);
-pgd_no_mem:
-	kbase_mmu_term(kctx);
+	kbase_mmu_term(kbdev, &kctx->mmu);
 term_dma_fence:
 	kbase_dma_fence_term(kctx);
 free_event:
@@ -217,21 +187,15 @@
 }
 KBASE_EXPORT_SYMBOL(kbase_create_context);
 
-static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
+static void kbase_reg_pending_dtor(struct kbase_device *kbdev,
+		struct kbase_va_region *reg)
 {
-	dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
+	dev_dbg(kbdev->dev, "Freeing pending unmapped region\n");
 	kbase_mem_phy_alloc_put(reg->cpu_alloc);
 	kbase_mem_phy_alloc_put(reg->gpu_alloc);
 	kfree(reg);
 }
 
-/**
- * kbase_destroy_context - Destroy a kernel base context.
- * @kctx: Context to destroy
- *
- * Calls kbase_destroy_os_context() to free OS specific structures.
- * Will release all outstanding regions.
- */
 void kbase_destroy_context(struct kbase_context *kctx)
 {
 	struct kbase_device *kbdev;
@@ -252,6 +216,8 @@
 	 * thread. */
 	kbase_pm_context_active(kbdev);
 
+	kbase_mem_pool_mark_dying(&kctx->mem_pool);
+
 	kbase_jd_zap_context(kctx);
 
 #ifdef CONFIG_DEBUG_FS
@@ -263,6 +229,7 @@
 
 	kbase_event_cleanup(kctx);
 
+
 	/*
 	 * JIT must be terminated before the code below as it must be called
 	 * without the region lock being held.
@@ -275,11 +242,8 @@
 
 	kbase_sticky_resource_term(kctx);
 
-	/* MMU is disabled as part of scheduling out the context */
-	kbase_mmu_free_pgd(kctx);
-
 	/* drop the aliasing sink page now that it can't be mapped anymore */
-	p = phys_to_page(as_phys_addr_t(kctx->aliasing_sink_page));
+	p = as_page(kctx->aliasing_sink_page);
 	kbase_mem_pool_free(&kctx->mem_pool, p, false);
 
 	/* free pending region setups */
@@ -289,7 +253,7 @@
 
 		BUG_ON(!kctx->pending_regions[cookie]);
 
-		kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
+		kbase_reg_pending_dtor(kbdev, kctx->pending_regions[cookie]);
 
 		kctx->pending_regions[cookie] = NULL;
 		pending_regions_to_clean &= ~(1UL << cookie);
@@ -298,6 +262,7 @@
 	kbase_region_tracker_term(kctx);
 	kbase_gpu_vm_unlock(kctx);
 
+
 	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
 	kbasep_js_kctx_term(kctx);
 
@@ -311,7 +276,7 @@
 	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
 	mutex_unlock(&kbdev->mmu_hw_mutex);
 
-	kbase_mmu_term(kctx);
+	kbase_mmu_term(kbdev, &kctx->mmu);
 
 	pages = atomic_read(&kctx->used_pages);
 	if (pages != 0)
@@ -328,13 +293,6 @@
 }
 KBASE_EXPORT_SYMBOL(kbase_destroy_context);
 
-/**
- * kbase_context_set_create_flags - Set creation flags on a context
- * @kctx: Kbase context
- * @flags: Flags to set
- *
- * Return: 0 on success
- */
 int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
 {
 	int err = 0;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.h b/drivers/gpu/arm/midgard/mali_kbase_context.h
index 431f9e5..30b0f64 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_context.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_context.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,7 +25,35 @@
 
 #include <linux/atomic.h>
 
+/**
+ * kbase_create_context() - Create a kernel base context.
+ * @kbdev: Kbase device
+ * @is_compat: Force creation of a 32-bit context
+ *
+ * Allocate and init a kernel base context.
+ *
+ * Return: new kbase context
+ */
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat);
 
+/**
+ * kbase_destroy_context - Destroy a kernel base context.
+ * @kctx: Context to destroy
+ *
+ * Calls kbase_destroy_os_context() to free OS specific structures.
+ * Will release all outstanding regions.
+ */
+void kbase_destroy_context(struct kbase_context *kctx);
+
+/**
+ * kbase_context_set_create_flags - Set creation flags on a context
+ * @kctx: Kbase context
+ * @flags: Flags to set, which shall be one of the flags of
+ *         BASE_CONTEXT_CREATE_KERNEL_FLAGS.
+ *
+ * Return: 0 on success, -EINVAL otherwise when an invalid flag is specified.
+ */
 int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
 
 /**
diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
index 22c995a..d44ebd9 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -34,6 +34,7 @@
 #endif /* CONFIG_MALI_DEVFREQ */
 #ifdef CONFIG_MALI_NO_MALI
 #include "mali_kbase_model_linux.h"
+#include <backend/gpu/mali_kbase_model_dummy.h>
 #endif /* CONFIG_MALI_NO_MALI */
 #include "mali_kbase_mem_profile_debugfs_buf_size.h"
 #include "mali_kbase_debug_mem_view.h"
@@ -68,7 +69,7 @@
 #include <linux/uaccess.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
-#include <linux/compat.h>	/* is_compat_task */
+#include <linux/compat.h>	/* is_compat_task/in_compat_syscall */
 #include <linux/mman.h>
 #include <linux/version.h>
 #include <mali_kbase_hw.h>
@@ -82,7 +83,7 @@
 #include <mali_kbase_config.h>
 
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#if (KERNEL_VERSION(3, 13, 0) <= LINUX_VERSION_CODE)
 #include <linux/pm_opp.h>
 #else
 #include <linux/opp.h>
@@ -105,19 +106,20 @@
 #define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
 
 static int kbase_api_handshake(struct kbase_context *kctx,
-		struct kbase_ioctl_version_check *version)
+			       struct kbase_ioctl_version_check *version)
 {
 	switch (version->major) {
 	case BASE_UK_VERSION_MAJOR:
 		/* set minor to be the lowest common */
 		version->minor = min_t(int, BASE_UK_VERSION_MINOR,
-				(int)version->minor);
+				       (int)version->minor);
 		break;
 	default:
 		/* We return our actual version regardless if it
 		 * matches the version returned by userspace -
 		 * userspace can bail if it can't handle this
-		 * version */
+		 * version
+		 */
 		version->major = BASE_UK_VERSION_MAJOR;
 		version->minor = BASE_UK_VERSION_MINOR;
 		break;
@@ -162,7 +164,6 @@
 	inited_backend_late = (1u << 6),
 	inited_device = (1u << 7),
 	inited_vinstr = (1u << 8),
-
 	inited_job_fault = (1u << 10),
 	inited_sysfs_group = (1u << 11),
 	inited_misc_register = (1u << 12),
@@ -280,9 +281,9 @@
  */
 static int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
 {
-	char buf[32];
+	char buf[4];
 
-	count = min(sizeof(buf), count);
+	count = min(count, sizeof(buf) - 1);
 
 	if (copy_from_user(buf, s, count))
 		return -EFAULT;
@@ -393,7 +394,11 @@
 	if (!kbdev)
 		return -ENODEV;
 
+#if (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE)
+	kctx = kbase_create_context(kbdev, in_compat_syscall());
+#else
 	kctx = kbase_create_context(kbdev, is_compat_task());
+#endif /* (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE) */
 	if (!kctx) {
 		ret = -ENOMEM;
 		goto out;
@@ -493,11 +498,11 @@
 	/* If this client was performing hwcnt dumping and did not explicitly
 	 * detach itself, remove it from the vinstr core now */
 	if (kctx->vinstr_cli) {
-		struct kbase_uk_hwcnt_setup setup;
+		struct kbase_ioctl_hwcnt_enable enable;
 
-		setup.dump_buffer = 0llu;
+		enable.dump_buffer = 0llu;
 		kbase_vinstr_legacy_hwc_setup(
-				kbdev->vinstr_ctx, &kctx->vinstr_cli, &setup);
+				kbdev->vinstr_ctx, &kctx->vinstr_cli, &enable);
 	}
 	mutex_unlock(&kctx->vinstr_cli_lock);
 
@@ -573,12 +578,27 @@
 	u64 flags = alloc->in.flags;
 	u64 gpu_va;
 
+	rcu_read_lock();
+	/* Don't allow memory allocation until user space has set up the
+	 * tracking page (which sets kctx->process_mm). Also catches when we've
+	 * forked.
+	 */
+	if (rcu_dereference(kctx->process_mm) != current->mm) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+	rcu_read_unlock();
+
+	if (flags & BASE_MEM_FLAGS_KERNEL_ONLY)
+		return -ENOMEM;
+
 	if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) &&
 			kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) {
 		/* force SAME_VA if a 64-bit client */
 		flags |= BASE_MEM_SAME_VA;
 	}
 
+
 	reg = kbase_mem_alloc(kctx, alloc->in.va_pages,
 			alloc->in.commit_pages,
 			alloc->in.extent,
@@ -610,38 +630,22 @@
 		struct kbase_ioctl_hwcnt_reader_setup *setup)
 {
 	int ret;
-	struct kbase_uk_hwcnt_reader_setup args = {
-		.buffer_count = setup->buffer_count,
-		.jm_bm = setup->jm_bm,
-		.shader_bm = setup->shader_bm,
-		.tiler_bm = setup->tiler_bm,
-		.mmu_l2_bm = setup->mmu_l2_bm
-	};
 
 	mutex_lock(&kctx->vinstr_cli_lock);
-	ret = kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, &args);
+	ret = kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup);
 	mutex_unlock(&kctx->vinstr_cli_lock);
 
-	if (ret)
-		return ret;
-	return args.fd;
+	return ret;
 }
 
 static int kbase_api_hwcnt_enable(struct kbase_context *kctx,
 		struct kbase_ioctl_hwcnt_enable *enable)
 {
 	int ret;
-	struct kbase_uk_hwcnt_setup args = {
-		.dump_buffer = enable->dump_buffer,
-		.jm_bm = enable->jm_bm,
-		.shader_bm = enable->shader_bm,
-		.tiler_bm = enable->tiler_bm,
-		.mmu_l2_bm = enable->mmu_l2_bm
-	};
 
 	mutex_lock(&kctx->vinstr_cli_lock);
 	ret = kbase_vinstr_legacy_hwc_setup(kctx->kbdev->vinstr_ctx,
-			&kctx->vinstr_cli, &args);
+			&kctx->vinstr_cli, enable);
 	mutex_unlock(&kctx->vinstr_cli_lock);
 
 	return ret;
@@ -670,6 +674,18 @@
 	return ret;
 }
 
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_api_hwcnt_set(struct kbase_context *kctx,
+		struct kbase_ioctl_hwcnt_values *values)
+{
+	gpu_model_set_dummy_prfcnt_sample(
+			(u32 __user *)(uintptr_t)values->data,
+			values->size);
+
+	return 0;
+}
+#endif
+
 static int kbase_api_disjoint_query(struct kbase_context *kctx,
 		struct kbase_ioctl_disjoint_query *query)
 {
@@ -700,10 +716,37 @@
 	return len;
 }
 
+/* Defaults for legacy JIT init ioctl */
+#define DEFAULT_MAX_JIT_ALLOCATIONS 255
+#define JIT_LEGACY_TRIM_LEVEL (0) /* No trimming */
+
+static int kbase_api_mem_jit_init_old(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_jit_init_old *jit_init)
+{
+	kctx->jit_version = 1;
+
+	return kbase_region_tracker_init_jit(kctx, jit_init->va_pages,
+			DEFAULT_MAX_JIT_ALLOCATIONS,
+			JIT_LEGACY_TRIM_LEVEL);
+}
+
 static int kbase_api_mem_jit_init(struct kbase_context *kctx,
 		struct kbase_ioctl_mem_jit_init *jit_init)
 {
-	return kbase_region_tracker_init_jit(kctx, jit_init->va_pages);
+	int i;
+
+	kctx->jit_version = 2;
+
+	for (i = 0; i < sizeof(jit_init->padding); i++) {
+		/* Ensure all padding bytes are 0 for potential future
+		 * extension
+		 */
+		if (jit_init->padding[i])
+			return -EINVAL;
+	}
+
+	return kbase_region_tracker_init_jit(kctx, jit_init->va_pages,
+			jit_init->max_allocations, jit_init->trim_level);
 }
 
 static int kbase_api_mem_sync(struct kbase_context *kctx,
@@ -793,6 +836,10 @@
 	}
 
 	flags = alias->in.flags;
+	if (flags & BASE_MEM_FLAGS_KERNEL_ONLY) {
+		vfree(ai);
+		return -EINVAL;
+	}
 
 	alias->out.gpu_va = kbase_mem_alias(kctx, &flags,
 			alias->in.stride, alias->in.nents,
@@ -814,6 +861,9 @@
 	int ret;
 	u64 flags = import->in.flags;
 
+	if (flags & BASE_MEM_FLAGS_KERNEL_ONLY)
+		return -ENOMEM;
+
 	ret = kbase_mem_import(kctx,
 			import->in.type,
 			u64_to_user_ptr(import->in.phandle),
@@ -830,6 +880,9 @@
 static int kbase_api_mem_flags_change(struct kbase_context *kctx,
 		struct kbase_ioctl_mem_flags_change *change)
 {
+	if (change->flags & BASE_MEM_FLAGS_KERNEL_ONLY)
+		return -ENOMEM;
+
 	return kbase_mem_flags_change(kctx, change->gpu_va,
 			change->flags, change->mask);
 }
@@ -1010,15 +1063,14 @@
 }
 #endif /* MALI_UNIT_TEST */
 
+
 #define KBASE_HANDLE_IOCTL(cmd, function)                          \
-	case cmd:                                                  \
 	do {                                                       \
 		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE);          \
 		return function(kctx);                             \
 	} while (0)
 
 #define KBASE_HANDLE_IOCTL_IN(cmd, function, type)                 \
-	case cmd:                                                  \
 	do {                                                       \
 		type param;                                        \
 		int err;                                           \
@@ -1031,7 +1083,6 @@
 	} while (0)
 
 #define KBASE_HANDLE_IOCTL_OUT(cmd, function, type)                \
-	case cmd:                                                  \
 	do {                                                       \
 		type param;                                        \
 		int ret, err;                                      \
@@ -1045,7 +1096,6 @@
 	} while (0)
 
 #define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type)                  \
-	case cmd:                                                      \
 	do {                                                           \
 		type param;                                            \
 		int ret, err;                                          \
@@ -1069,12 +1119,17 @@
 
 	/* Only these ioctls are available until setup is complete */
 	switch (cmd) {
+	case KBASE_IOCTL_VERSION_CHECK:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK,
 				kbase_api_handshake,
 				struct kbase_ioctl_version_check);
+		break;
+
+	case KBASE_IOCTL_SET_FLAGS:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS,
 				kbase_api_set_flags,
 				struct kbase_ioctl_set_flags);
+		break;
 	}
 
 	/* Block call until version handshake and setup is complete */
@@ -1083,109 +1138,192 @@
 
 	/* Normal ioctls */
 	switch (cmd) {
+	case KBASE_IOCTL_JOB_SUBMIT:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT,
 				kbase_api_job_submit,
 				struct kbase_ioctl_job_submit);
+		break;
+	case KBASE_IOCTL_GET_GPUPROPS:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS,
 				kbase_api_get_gpuprops,
 				struct kbase_ioctl_get_gpuprops);
+		break;
+	case KBASE_IOCTL_POST_TERM:
 		KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM,
 				kbase_api_post_term);
+		break;
+	case KBASE_IOCTL_MEM_ALLOC:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC,
 				kbase_api_mem_alloc,
 				union kbase_ioctl_mem_alloc);
+		break;
+	case KBASE_IOCTL_MEM_QUERY:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY,
 				kbase_api_mem_query,
 				union kbase_ioctl_mem_query);
+		break;
+	case KBASE_IOCTL_MEM_FREE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE,
 				kbase_api_mem_free,
 				struct kbase_ioctl_mem_free);
-		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP,
-				kbase_api_hwcnt_reader_setup,
-				struct kbase_ioctl_hwcnt_reader_setup);
-		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE,
-				kbase_api_hwcnt_enable,
-				struct kbase_ioctl_hwcnt_enable);
-		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP,
-				kbase_api_hwcnt_dump);
-		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR,
-				kbase_api_hwcnt_clear);
+		break;
+	case KBASE_IOCTL_DISJOINT_QUERY:
 		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY,
 				kbase_api_disjoint_query,
 				struct kbase_ioctl_disjoint_query);
+		break;
+	case KBASE_IOCTL_GET_DDK_VERSION:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION,
 				kbase_api_get_ddk_version,
 				struct kbase_ioctl_get_ddk_version);
+		break;
+	case KBASE_IOCTL_MEM_JIT_INIT_OLD:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_OLD,
+				kbase_api_mem_jit_init_old,
+				struct kbase_ioctl_mem_jit_init_old);
+		break;
+	case KBASE_IOCTL_MEM_JIT_INIT:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT,
 				kbase_api_mem_jit_init,
 				struct kbase_ioctl_mem_jit_init);
+		break;
+	case KBASE_IOCTL_MEM_SYNC:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC,
 				kbase_api_mem_sync,
 				struct kbase_ioctl_mem_sync);
+		break;
+	case KBASE_IOCTL_MEM_FIND_CPU_OFFSET:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_CPU_OFFSET,
 				kbase_api_mem_find_cpu_offset,
 				union kbase_ioctl_mem_find_cpu_offset);
+		break;
+	case KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET,
 				kbase_api_mem_find_gpu_start_and_offset,
 				union kbase_ioctl_mem_find_gpu_start_and_offset);
+		break;
+	case KBASE_IOCTL_GET_CONTEXT_ID:
 		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID,
 				kbase_api_get_context_id,
 				struct kbase_ioctl_get_context_id);
+		break;
+	case KBASE_IOCTL_TLSTREAM_ACQUIRE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE,
 				kbase_api_tlstream_acquire,
 				struct kbase_ioctl_tlstream_acquire);
+		break;
+	case KBASE_IOCTL_TLSTREAM_FLUSH:
 		KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH,
 				kbase_api_tlstream_flush);
+		break;
+	case KBASE_IOCTL_MEM_COMMIT:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT,
 				kbase_api_mem_commit,
 				struct kbase_ioctl_mem_commit);
+		break;
+	case KBASE_IOCTL_MEM_ALIAS:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS,
 				kbase_api_mem_alias,
 				union kbase_ioctl_mem_alias);
+		break;
+	case KBASE_IOCTL_MEM_IMPORT:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT,
 				kbase_api_mem_import,
 				union kbase_ioctl_mem_import);
+		break;
+	case KBASE_IOCTL_MEM_FLAGS_CHANGE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE,
 				kbase_api_mem_flags_change,
 				struct kbase_ioctl_mem_flags_change);
+		break;
+	case KBASE_IOCTL_STREAM_CREATE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE,
 				kbase_api_stream_create,
 				struct kbase_ioctl_stream_create);
+		break;
+	case KBASE_IOCTL_FENCE_VALIDATE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE,
 				kbase_api_fence_validate,
 				struct kbase_ioctl_fence_validate);
+		break;
+	case KBASE_IOCTL_GET_PROFILING_CONTROLS:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_PROFILING_CONTROLS,
 				kbase_api_get_profiling_controls,
 				struct kbase_ioctl_get_profiling_controls);
+		break;
+	case KBASE_IOCTL_MEM_PROFILE_ADD:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD,
 				kbase_api_mem_profile_add,
 				struct kbase_ioctl_mem_profile_add);
+		break;
+	case KBASE_IOCTL_SOFT_EVENT_UPDATE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE,
 				kbase_api_soft_event_update,
 				struct kbase_ioctl_soft_event_update);
-#ifdef CONFIG_MALI_JOB_DUMP
-		KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START,
-				kbase_gpu_gwt_start);
-		KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_STOP,
-				kbase_gpu_gwt_stop);
-		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CINSTR_GWT_DUMP,
-				kbase_gpu_gwt_dump,
-				union kbase_ioctl_cinstr_gwt_dump);
-#endif
+		break;
+	case KBASE_IOCTL_STICKY_RESOURCE_MAP:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP,
 				kbase_api_sticky_resource_map,
 				struct kbase_ioctl_sticky_resource_map);
+		break;
+	case KBASE_IOCTL_STICKY_RESOURCE_UNMAP:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_UNMAP,
 				kbase_api_sticky_resource_unmap,
 				struct kbase_ioctl_sticky_resource_unmap);
+		break;
 
+	/* Instrumentation. */
+	case KBASE_IOCTL_HWCNT_READER_SETUP:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP,
+				kbase_api_hwcnt_reader_setup,
+				struct kbase_ioctl_hwcnt_reader_setup);
+		break;
+	case KBASE_IOCTL_HWCNT_ENABLE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE,
+				kbase_api_hwcnt_enable,
+				struct kbase_ioctl_hwcnt_enable);
+		break;
+	case KBASE_IOCTL_HWCNT_DUMP:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP,
+				kbase_api_hwcnt_dump);
+		break;
+	case KBASE_IOCTL_HWCNT_CLEAR:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR,
+				kbase_api_hwcnt_clear);
+		break;
+#ifdef CONFIG_MALI_NO_MALI
+	case KBASE_IOCTL_HWCNT_SET:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET,
+				kbase_api_hwcnt_set,
+				struct kbase_ioctl_hwcnt_values);
+		break;
+#endif
+#ifdef CONFIG_MALI_JOB_DUMP
+	case KBASE_IOCTL_CINSTR_GWT_START:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START,
+				kbase_gpu_gwt_start);
+		break;
+	case KBASE_IOCTL_CINSTR_GWT_STOP:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_STOP,
+				kbase_gpu_gwt_stop);
+		break;
+	case KBASE_IOCTL_CINSTR_GWT_DUMP:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CINSTR_GWT_DUMP,
+				kbase_gpu_gwt_dump,
+				union kbase_ioctl_cinstr_gwt_dump);
+		break;
+#endif
 #if MALI_UNIT_TEST
+	case KBASE_IOCTL_TLSTREAM_TEST:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST,
 				kbase_api_tlstream_test,
 				struct kbase_ioctl_tlstream_test);
+		break;
+	case KBASE_IOCTL_TLSTREAM_STATS:
 		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS,
 				kbase_api_tlstream_stats,
 				struct kbase_ioctl_tlstream_stats);
+		break;
 #endif
 	}
 
@@ -1382,111 +1520,6 @@
  */
 static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
 
-/**
- * show_ca_policy - Show callback for the core_availability_policy sysfs file.
- *
- * This function is called to get the contents of the core_availability_policy
- * sysfs file. This is a list of the available policies with the currently
- * active one surrounded by square brackets.
- *
- * @dev:	The device this sysfs file is for
- * @attr:	The attributes of the sysfs file
- * @buf:	The output buffer for the sysfs file contents
- *
- * Return: The number of bytes output to @buf.
- */
-static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf)
-{
-	struct kbase_device *kbdev;
-	const struct kbase_pm_ca_policy *current_policy;
-	const struct kbase_pm_ca_policy *const *policy_list;
-	int policy_count;
-	int i;
-	ssize_t ret = 0;
-
-	kbdev = to_kbase_device(dev);
-
-	if (!kbdev)
-		return -ENODEV;
-
-	current_policy = kbase_pm_ca_get_policy(kbdev);
-
-	policy_count = kbase_pm_ca_list_policies(&policy_list);
-
-	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
-		if (policy_list[i] == current_policy)
-			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
-		else
-			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
-	}
-
-	if (ret < PAGE_SIZE - 1) {
-		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
-	} else {
-		buf[PAGE_SIZE - 2] = '\n';
-		buf[PAGE_SIZE - 1] = '\0';
-		ret = PAGE_SIZE - 1;
-	}
-
-	return ret;
-}
-
-/**
- * set_ca_policy - Store callback for the core_availability_policy sysfs file.
- *
- * This function is called when the core_availability_policy sysfs file is
- * written to. It matches the requested policy against the available policies
- * and if a matching policy is found calls kbase_pm_set_policy() to change
- * the policy.
- *
- * @dev:	The device with sysfs file is for
- * @attr:	The attributes of the sysfs file
- * @buf:	The value written to the sysfs file
- * @count:	The number of bytes written to the sysfs file
- *
- * Return: @count if the function succeeded. An error code on failure.
- */
-static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct kbase_device *kbdev;
-	const struct kbase_pm_ca_policy *new_policy = NULL;
-	const struct kbase_pm_ca_policy *const *policy_list;
-	int policy_count;
-	int i;
-
-	kbdev = to_kbase_device(dev);
-
-	if (!kbdev)
-		return -ENODEV;
-
-	policy_count = kbase_pm_ca_list_policies(&policy_list);
-
-	for (i = 0; i < policy_count; i++) {
-		if (sysfs_streq(policy_list[i]->name, buf)) {
-			new_policy = policy_list[i];
-			break;
-		}
-	}
-
-	if (!new_policy) {
-		dev_err(dev, "core_availability_policy: policy not found\n");
-		return -EINVAL;
-	}
-
-	kbase_pm_ca_set_policy(kbdev, new_policy);
-
-	return count;
-}
-
-/*
- * The sysfs file core_availability_policy
- *
- * This is used for obtaining information about the available policies,
- * determining which policy is currently active, and changing the active
- * policy.
- */
-static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy);
-
 /*
  * show_core_mask - Show callback for the core_mask sysfs file.
  *
@@ -2297,7 +2330,11 @@
 		{ .id = GPU_ID2_PRODUCT_TSIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G51" },
 		{ .id = GPU_ID2_PRODUCT_TNOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
-		  .name = "Mali-TNOx" },
+		  .name = "Mali-G76" },
+		{ .id = GPU_ID2_PRODUCT_TDVX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G31" },
+		{ .id = GPU_ID2_PRODUCT_TGOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G52" },
 	};
 	const char *product_name = "(Unknown Mali GPU)";
 	struct kbase_device *kbdev;
@@ -2737,6 +2774,88 @@
 static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size,
 		set_lp_mem_pool_max_size);
 
+/**
+ * show_js_ctx_scheduling_mode - Show callback for js_ctx_scheduling_mode sysfs
+ *                               entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the context scheduling mode information.
+ *
+ * This function is called to get the context scheduling mode being used by JS.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_js_ctx_scheduling_mode(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->js_ctx_scheduling_mode);
+}
+
+/**
+ * set_js_ctx_scheduling_mode - Set callback for js_ctx_scheduling_mode sysfs
+ *                              entry.
+ * @dev:   The device this sysfs file is for.
+ * @attr:  The attributes of the sysfs file.
+ * @buf:   The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This function is called when the js_ctx_scheduling_mode sysfs file is written
+ * to. It checks the data written, and if valid updates the ctx scheduling mode
+ * being by JS.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_ctx_scheduling_mode(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbasep_kctx_list_element *element;
+	u32 new_js_ctx_scheduling_mode;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	int ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtouint(buf, 0, &new_js_ctx_scheduling_mode);
+	if (ret || new_js_ctx_scheduling_mode >= KBASE_JS_PRIORITY_MODE_COUNT) {
+		dev_err(kbdev->dev, "Couldn't process js_ctx_scheduling_mode"
+				" write operation.\n"
+				"Use format <js_ctx_scheduling_mode>\n");
+		return -EINVAL;
+	}
+
+	if (new_js_ctx_scheduling_mode == kbdev->js_ctx_scheduling_mode)
+		return count;
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Update the context priority mode */
+	kbdev->js_ctx_scheduling_mode = new_js_ctx_scheduling_mode;
+
+	/* Adjust priority of all the contexts as per the new mode */
+	list_for_each_entry(element, &kbdev->kctx_list, link)
+		kbase_js_update_ctx_priority(element->kctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	dev_dbg(kbdev->dev, "JS ctx scheduling mode: %u\n", new_js_ctx_scheduling_mode);
+
+	return count;
+}
+
+static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR,
+		show_js_ctx_scheduling_mode,
+		set_js_ctx_scheduling_mode);
 #ifdef CONFIG_DEBUG_FS
 
 /* Number of entries in serialize_jobs_settings[] */
@@ -2996,6 +3115,7 @@
 		kbdev->reg_start = reg_res->start;
 		kbdev->reg_size = resource_size(reg_res);
 
+
 		err = kbase_common_reg_map(kbdev);
 		if (err) {
 			dev_err(kbdev->dev, "Failed to map registers\n");
@@ -3111,6 +3231,7 @@
 #endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
 }
 
+#ifdef MALI_KBASE_BUILD
 #ifdef CONFIG_DEBUG_FS
 
 #if KBASE_GPU_RESET_EN
@@ -3171,7 +3292,7 @@
 	ssize_t ret_val;
 
 	kbase_pm_context_active(kbdev);
-	gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL);
+	gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS));
 	kbase_pm_context_idle(kbdev);
 
 	if (gpu_status & GPU_DBGEN)
@@ -3265,10 +3386,6 @@
 	kbasep_trace_debugfs_init(kbdev);
 #endif /* KBASE_TRACE_ENABLE */
 
-#ifdef CONFIG_MALI_TRACE_TIMELINE
-	kbasep_trace_timeline_debugfs_init(kbdev);
-#endif /* CONFIG_MALI_TRACE_TIMELINE */
-
 #ifdef CONFIG_MALI_DEVFREQ
 #ifdef CONFIG_DEVFREQ_THERMAL
 	if (kbdev->inited_subsys & inited_devfreq)
@@ -3302,6 +3419,7 @@
 
 static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
 #endif /* CONFIG_DEBUG_FS */
+#endif /* MALI_KBASE_BUILD */
 
 static void kbase_device_coherency_init(struct kbase_device *kbdev,
 		unsigned prod_id)
@@ -3390,12 +3508,12 @@
 	&dev_attr_reset_timeout.attr,
 	&dev_attr_js_scheduling_period.attr,
 	&dev_attr_power_policy.attr,
-	&dev_attr_core_availability_policy.attr,
 	&dev_attr_core_mask.attr,
 	&dev_attr_mem_pool_size.attr,
 	&dev_attr_mem_pool_max_size.attr,
 	&dev_attr_lp_mem_pool_size.attr,
 	&dev_attr_lp_mem_pool_max_size.attr,
+	&dev_attr_js_ctx_scheduling_mode.attr,
 	NULL
 };
 
@@ -3443,10 +3561,12 @@
 		kbdev->inited_subsys &= ~inited_get_device;
 	}
 
+#ifdef MALI_KBASE_BUILD
 	if (kbdev->inited_subsys & inited_debugfs) {
 		kbase_device_debugfs_term(kbdev);
 		kbdev->inited_subsys &= ~inited_debugfs;
 	}
+#endif
 
 	if (kbdev->inited_subsys & inited_job_fault) {
 		kbase_debug_job_fault_dev_term(kbdev);
@@ -3460,6 +3580,7 @@
 	}
 #endif
 
+
 	if (kbdev->inited_subsys & inited_vinstr) {
 		kbase_vinstr_term(kbdev->vinstr_ctx);
 		kbdev->inited_subsys &= ~inited_vinstr;
@@ -3713,6 +3834,7 @@
 	}
 	kbdev->inited_subsys |= inited_vinstr;
 
+
 #ifdef CONFIG_MALI_DEVFREQ
 	/* Devfreq uses vinstr, so must be initialized after it. */
 	err = kbase_devfreq_init(kbdev);
@@ -3722,6 +3844,7 @@
 		dev_err(kbdev->dev, "Continuing without devfreq\n");
 #endif /* CONFIG_MALI_DEVFREQ */
 
+#ifdef MALI_KBASE_BUILD
 	err = kbase_debug_job_fault_dev_init(kbdev);
 	if (err) {
 		dev_err(kbdev->dev, "Job fault debug initialization failed\n");
@@ -3742,6 +3865,7 @@
 	kbdev->mdev.name = kbdev->devname;
 	kbdev->mdev.fops = &kbase_fops;
 	kbdev->mdev.parent = get_device(kbdev->dev);
+	kbdev->mdev.mode = 0666;
 	kbdev->inited_subsys |= inited_get_device;
 
 	/* This needs to happen before registering the device with misc_register(),
@@ -3799,6 +3923,7 @@
 			"Probed as %s\n", dev_name(kbdev->mdev.this_device));
 
 	kbase_dev_nr++;
+#endif /* MALI_KBASE_BUILD */
 
 	return err;
 }
diff --git a/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c
index 85a6afd..bda0560 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -121,7 +121,8 @@
 
 				kctx->as_nr = free_as;
 				kbdev->as_to_kctx[free_as] = kctx;
-				kbase_mmu_update(kctx);
+				kbase_mmu_update(kbdev, &kctx->mmu,
+					kctx->as_nr);
 			}
 		} else {
 			atomic_dec(&kctx->refcount);
@@ -193,7 +194,8 @@
 			if (atomic_read(&kctx->refcount)) {
 				WARN_ON(kctx->as_nr != i);
 
-				kbase_mmu_update(kctx);
+				kbase_mmu_update(kbdev, &kctx->mmu,
+					kctx->as_nr);
 			} else {
 				/* This context might have been assigned an
 				 * AS before, clear it.
diff --git a/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h
index 400ee62..ab57a0d 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,7 +25,8 @@
 
 #include <mali_kbase.h>
 
-/* The Context Scheduler manages address space assignment and reference
+/**
+ * The Context Scheduler manages address space assignment and reference
  * counting to kbase_context. The interface has been designed to minimise
  * interactions between the Job Scheduler and Power Management/MMU to support
  * the existing Job Scheduler interface.
@@ -39,35 +40,30 @@
  * code.
  */
 
-/* base_ctx_sched_init - Initialise the context scheduler
+/**
+ * kbase_ctx_sched_init - Initialise the context scheduler
+ * @kbdev: The device for which the context scheduler needs to be initialised
  *
- * @kbdev: The device for which the context scheduler needs to be
- *         initialised
+ * This must be called during device initialisation. The number of hardware
+ * address spaces must already be established before calling this function.
  *
  * Return: 0 for success, otherwise failure
- *
- * This must be called during device initilisation. The number of hardware
- * address spaces must already be established before calling this function.
  */
 int kbase_ctx_sched_init(struct kbase_device *kbdev);
 
-/* base_ctx_sched_term - Terminate the context scheduler
- *
- * @kbdev: The device for which the context scheduler needs to be
- *         terminated
+/**
+ * kbase_ctx_sched_term - Terminate the context scheduler
+ * @kbdev: The device for which the context scheduler needs to be terminated
  *
  * This must be called during device termination after all contexts have been
  * destroyed.
  */
 void kbase_ctx_sched_term(struct kbase_device *kbdev);
 
-/* kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context
- *
+/**
+ * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context
  * @kctx: The context to which to retain a reference
  *
- * Return: The address space that the context has been assigned to or
- *         KBASEP_AS_NR_INVALID if no address space was available.
- *
  * This function should be called whenever an address space should be assigned
  * to a context and programmed onto the MMU. It should typically be called
  * when jobs are ready to be submitted to the GPU.
@@ -77,11 +73,14 @@
  *
  * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
  * held whilst calling this function.
+ *
+ * Return: The address space that the context has been assigned to or
+ *         KBASEP_AS_NR_INVALID if no address space was available.
  */
 int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx);
 
-/* kbase_ctx_sched_retain_ctx_refcount
- *
+/**
+ * kbase_ctx_sched_retain_ctx_refcount
  * @kctx: The context to which to retain a reference
  *
  * This function only retains a reference to the context. It must be called
@@ -95,8 +94,8 @@
  */
 void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx);
 
-/* kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context
- *
+/**
+ * kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context
  * @kctx: The context from which to release a reference
  *
  * This function should be called whenever an address space could be unassigned
@@ -108,8 +107,8 @@
  */
 void kbase_ctx_sched_release_ctx(struct kbase_context *kctx);
 
-/* kbase_ctx_sched_remove_ctx - Unassign previously assigned address space
- *
+/**
+ * kbase_ctx_sched_remove_ctx - Unassign previously assigned address space
  * @kctx: The context to be removed
  *
  * This function should be called when a context is being destroyed. The
@@ -121,8 +120,8 @@
  */
 void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx);
 
-/* kbase_ctx_sched_restore_all_as - Reprogram all address spaces
- *
+/**
+ * kbase_ctx_sched_restore_all_as - Reprogram all address spaces
  * @kbdev: The device for which address spaces to be reprogrammed
  *
  * This function shall reprogram all address spaces previously assigned to
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
index d2c57ca..ee45529 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -130,7 +130,7 @@
 	if (!(map->flags & KBASE_REG_CPU_CACHED))
 		prot = pgprot_writecombine(prot);
 
-	page = phys_to_page(as_phys_addr_t(map->alloc->pages[data->offset]));
+	page = as_page(map->alloc->pages[data->offset]);
 	mapping = vmap(&page, 1, VM_MAP, prot);
 	if (!mapping)
 		goto out;
@@ -223,12 +223,6 @@
 		goto out;
 	}
 
-	ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data);
-	if (0 != ret) {
-		kbase_gpu_vm_unlock(kctx);
-		goto out;
-	}
-
 	ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data);
 	if (0 != ret) {
 		kbase_gpu_vm_unlock(kctx);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h
index 4bb3442..4adfe35 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_defs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -42,6 +42,7 @@
 #include <mali_kbase_gpuprops_types.h>
 #include <protected_mode_switcher.h>
 
+
 #include <linux/atomic.h>
 #include <linux/mempool.h>
 #include <linux/slab.h>
@@ -52,7 +53,6 @@
 #include <linux/bus_logger.h>
 #endif
 
-
 #if defined(CONFIG_SYNC)
 #include <sync.h>
 #else
@@ -147,11 +147,7 @@
 
 #define MIDGARD_MMU_LEVEL(x) (x)
 
-#if MIDGARD_MMU_VA_BITS > 39
 #define MIDGARD_MMU_TOPLEVEL    MIDGARD_MMU_LEVEL(0)
-#else
-#define MIDGARD_MMU_TOPLEVEL    MIDGARD_MMU_LEVEL(1)
-#endif
 
 #define MIDGARD_MMU_BOTTOMLEVEL MIDGARD_MMU_LEVEL(3)
 
@@ -175,10 +171,20 @@
 /* Maximum force replay limit when randomization is enabled */
 #define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
 
+/* Maximum number of pages of memory that require a permanent mapping, per
+ * kbase_context
+ */
+#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((1024ul * 1024ul) >> \
+								PAGE_SHIFT)
+
+
 /** Atom has been previously soft-stoppped */
 #define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
 /** Atom has been previously retried to execute */
 #define KBASE_KATOM_FLAGS_RERUN (1<<2)
+/* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps to
+ * disambiguate short-running job chains during soft/hard stopping of jobs
+ */
 #define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
 /** Atom has been previously hard-stopped. */
 #define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
@@ -196,6 +202,8 @@
 #define KBASE_KATOM_FLAG_PROTECTED (1<<11)
 /* Atom has been stored in runnable_tree */
 #define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12)
+/* Atom is waiting for L2 caches to power up in order to enter protected mode */
+#define KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT (1<<13)
 
 /* SW related flags about types of JS_COMMAND action
  * NOTE: These must be masked off by JS_COMMAND_MASK */
@@ -229,8 +237,27 @@
 struct kbase_device;
 struct kbase_as;
 struct kbase_mmu_setup;
+struct kbase_ipa_model_vinstr_data;
 
 #ifdef CONFIG_DEBUG_FS
+/**
+ * struct base_job_fault_event - keeps track of the atom which faulted or which
+ *                               completed after the faulty atom but before the
+ *                               debug data for faulty atom was dumped.
+ *
+ * @event_code:     event code for the atom, should != BASE_JD_EVENT_DONE for the
+ *                  atom which faulted.
+ * @katom:          pointer to the atom for which job fault occurred or which completed
+ *                  after the faulty atom.
+ * @job_fault_work: work item, queued only for the faulty atom, which waits for
+ *                  the dumping to get completed and then does the bottom half
+ *                  of job done for the atoms which followed the faulty atom.
+ * @head:           List head used to store the atom in the global list of faulty
+ *                  atoms or context specific list of atoms which got completed
+ *                  during the dump.
+ * @reg_offset:     offset of the register to be dumped next, only applicable for
+ *                  the faulty atom.
+ */
 struct base_job_fault_event {
 
 	u32 event_code;
@@ -242,6 +269,12 @@
 
 #endif
 
+/**
+ * struct kbase_jd_atom_dependency - Contains the dependency info for an atom.
+ * @atom:          pointer to the dependee atom.
+ * @dep_type:      type of dependency on the dependee @atom, i.e. order or data
+ *                 dependency. BASE_JD_DEP_TYPE_INVALID indicates no dependency.
+ */
 struct kbase_jd_atom_dependency {
 	struct kbase_jd_atom *atom;
 	u8 dep_type;
@@ -281,14 +314,14 @@
 };
 
 /**
- * @brief The function retrieves a read-only reference to the atom field from
- * the  kbase_jd_atom_dependency structure
+ * kbase_jd_katom_dep_atom - Retrieves a read-only reference to the
+ *                           dependee atom.
+ * @dep:   pointer to the dependency info structure.
  *
- * @param[in] dep kbase jd atom dependency.
- *
- * @return readonly reference to dependent ATOM.
+ * Return: readonly reference to dependee atom.
  */
-static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
+static inline const struct kbase_jd_atom *
+kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
 {
 	LOCAL_ASSERT(dep != NULL);
 
@@ -296,12 +329,11 @@
 }
 
 /**
- * @brief The function retrieves a read-only reference to the dependency type field from
- * the  kbase_jd_atom_dependency structure
+ * kbase_jd_katom_dep_type -  Retrieves the dependency type info
  *
- * @param[in] dep kbase jd atom dependency.
+ * @dep:   pointer to the dependency info structure.
  *
- * @return A dependency type value.
+ * Return: the type of dependency there is on the dependee atom.
  */
 static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
 {
@@ -311,12 +343,11 @@
 }
 
 /**
- * @brief Setter macro for dep_atom array entry in kbase_jd_atom
- *
- * @param[in] dep    The kbase jd atom dependency.
- * @param[in] a      The ATOM to be set as a dependency.
- * @param     type   The ATOM dependency type to be set.
- *
+ * kbase_jd_katom_dep_set - sets up the dependency info structure
+ *                          as per the values passed.
+ * @const_dep:    pointer to the dependency info structure to be setup.
+ * @a:            pointer to the dependee atom.
+ * @type:         type of dependency there is on the dependee atom.
  */
 static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
 		struct kbase_jd_atom *a, u8 type)
@@ -332,10 +363,9 @@
 }
 
 /**
- * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ * kbase_jd_katom_dep_clear - resets the dependency info structure
  *
- * @param[in] dep    The kbase jd atom dependency to be cleared.
- *
+ * @const_dep:    pointer to the dependency info structure to be setup.
  */
 static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
 {
@@ -349,74 +379,216 @@
 	dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
 }
 
+/**
+ * enum kbase_atom_gpu_rb_state - The state of an atom, pertinent after it becomes
+ *                                runnable, with respect to job slot ringbuffer/fifo.
+ * @KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: Atom not currently present in slot fifo, which
+ *                                implies that either atom has not become runnable
+ *                                due to dependency or has completed the execution
+ *                                on GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_BLOCKED: Atom has been added to slot fifo but is blocked
+ *                                due to cross slot dependency, can't be submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: Atom has been added to slot fifo but
+ *                                is waiting for the completion of previously added atoms
+ *                                in current & other slots, as their protected mode
+ *                                requirements do not match with the current atom.
+ * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: Atom is in slot fifo and is
+ *                                waiting for completion of protected mode transition,
+ *                                needed before the atom is submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: Atom is in slot fifo but is waiting
+ *                                for the cores, which are needed to execute the job
+ *                                chain represented by the atom, to become available
+ * @KBASE_ATOM_GPU_RB_WAITING_AFFINITY: Atom is in slot fifo but is blocked on
+ *                                affinity due to rmu workaround for Hw issue 8987.
+ * @KBASE_ATOM_GPU_RB_READY:      Atom is in slot fifo and can be submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_SUBMITTED:  Atom is in slot fifo and has been submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_RETURN_TO_JS: Atom must be returned to JS due to some failure,
+ *                                but only after the previously added atoms in fifo
+ *                                have completed or have also been returned to JS.
+ */
 enum kbase_atom_gpu_rb_state {
-	/* Atom is not currently present in slot ringbuffer */
 	KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
-	/* Atom is in slot ringbuffer but is blocked on a previous atom */
 	KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
-	/* Atom is in slot ringbuffer but is waiting for a previous protected
-	 * mode transition to complete */
 	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV,
-	/* Atom is in slot ringbuffer but is waiting for proected mode
-	 * transition */
 	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION,
-	/* Atom is in slot ringbuffer but is waiting for cores to become
-	 * available */
 	KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
-	/* Atom is in slot ringbuffer but is blocked on affinity */
 	KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
-	/* Atom is in slot ringbuffer and ready to run */
 	KBASE_ATOM_GPU_RB_READY,
-	/* Atom is in slot ringbuffer and has been submitted to the GPU */
 	KBASE_ATOM_GPU_RB_SUBMITTED,
-	/* Atom must be returned to JS as soon as it reaches the head of the
-	 * ringbuffer due to a previous failure */
 	KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1
 };
 
+/**
+ * enum kbase_atom_enter_protected_state - The state of an atom with respect to the
+ *                      preparation for GPU's entry into protected mode, becomes
+ *                      pertinent only after atom's state with respect to slot
+ *                      ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @KBASE_ATOM_ENTER_PROTECTED_CHECK:  Starting state. Check if there are any atoms
+ *                      currently submitted to GPU and protected mode transition is
+ *                      not already in progress.
+ * @KBASE_ATOM_ENTER_PROTECTED_VINSTR: Wait for vinstr to suspend before entry into
+ *                      protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation
+ *                      for the coherency change. L2 shall be powered down and GPU shall
+ *                      come out of fully coherent mode before entering protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: Prepare coherency change;
+ *                      for BASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on so that
+ *                      coherency register contains correct value when GPU enters
+ *                      protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; for BASE_HW_ISSUE_TGOX_R1_1234 check
+ *                      that L2 is powered up and switch GPU to protected mode.
+ */
 enum kbase_atom_enter_protected_state {
-	/*
-	 * Starting state:
-	 * Check if a transition into protected mode is required.
-	 *
-	 * NOTE: The integer value of this must
-	 *       match KBASE_ATOM_EXIT_PROTECTED_CHECK.
+	/**
+	 * NOTE: The integer value of this must match KBASE_ATOM_EXIT_PROTECTED_CHECK.
 	 */
 	KBASE_ATOM_ENTER_PROTECTED_CHECK = 0,
-	/* Wait for vinstr to suspend. */
 	KBASE_ATOM_ENTER_PROTECTED_VINSTR,
-	/* Wait for the L2 to become idle in preparation for
-	 * the coherency change. */
 	KBASE_ATOM_ENTER_PROTECTED_IDLE_L2,
-	/* End state;
-	 * Prepare coherency change. */
+	KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY,
 	KBASE_ATOM_ENTER_PROTECTED_FINISHED,
 };
 
+/**
+ * enum kbase_atom_exit_protected_state - The state of an atom with respect to the
+ *                      preparation for GPU's exit from protected mode, becomes
+ *                      pertinent only after atom's state with respect to slot
+ *                      ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @KBASE_ATOM_EXIT_PROTECTED_CHECK: Starting state. Check if there are any atoms
+ *                      currently submitted to GPU and protected mode transition is
+ *                      not already in progress.
+ * @KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation
+ *                      for the reset, as exiting protected mode requires a reset.
+ * @KBASE_ATOM_EXIT_PROTECTED_RESET: Issue the reset to trigger exit from protected mode
+ * @KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: End state, Wait for the reset to complete
+ */
 enum kbase_atom_exit_protected_state {
-	/*
-	 * Starting state:
-	 * Check if a transition out of protected mode is required.
-	 *
-	 * NOTE: The integer value of this must
-	 *       match KBASE_ATOM_ENTER_PROTECTED_CHECK.
+	/**
+	 * NOTE: The integer value of this must match KBASE_ATOM_ENTER_PROTECTED_CHECK.
 	 */
 	KBASE_ATOM_EXIT_PROTECTED_CHECK = 0,
-	/* Wait for the L2 to become idle in preparation
-	 * for the reset. */
 	KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
-	/* Issue the protected reset. */
 	KBASE_ATOM_EXIT_PROTECTED_RESET,
-	/* End state;
-	 * Wait for the reset to complete. */
 	KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
 };
 
+/**
+ * struct kbase_ext_res - Contains the info for external resources referred
+ *                        by an atom, which have been mapped on GPU side.
+ * @gpu_address:          Start address of the memory region allocated for
+ *                        the resource from GPU virtual address space.
+ * @alloc:                pointer to physical pages tracking object, set on
+ *                        mapping the external resource on GPU side.
+ */
 struct kbase_ext_res {
 	u64 gpu_address;
 	struct kbase_mem_phy_alloc *alloc;
 };
 
+/**
+ * struct kbase_jd_atom  - object representing the atom, containing the complete
+ *                         state and attributes of an atom.
+ * @work:                  work item for the bottom half processing of the atom,
+ *                         by JD or JS, after it got executed on GPU or the input
+ *                         fence got signaled
+ * @start_timestamp:       time at which the atom was submitted to the GPU, by
+ *                         updating the JS_HEAD_NEXTn register.
+ * @udata:                 copy of the user data sent for the atom in base_jd_submit.
+ * @kctx:                  Pointer to the base context with which the atom is associated.
+ * @dep_head:              Array of 2 list heads, pointing to the two list of atoms
+ *                         which are blocked due to dependency on this atom.
+ * @dep_item:              Array of 2 list heads, used to store the atom in the list of
+ *                         other atoms depending on the same dependee atom.
+ * @dep:                   Array containing the dependency info for the 2 atoms on which
+ *                         the atom depends upon.
+ * @jd_item:               List head used during job dispatch job_done processing - as
+ *                         dependencies may not be entirely resolved at this point,
+ *                         we need to use a separate list head.
+ * @in_jd_list:            flag set to true if atom's @jd_item is currently on a list,
+ *                         prevents atom being processed twice.
+ * @nr_extres:             number of external resources referenced by the atom.
+ * @extres:                pointer to the location containing info about @nr_extres
+ *                         external resources referenced by the atom.
+ * @device_nr:             indicates the coregroup with which the atom is associated,
+ *                         when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified.
+ * @jc:                    GPU address of the job-chain.
+ * @softjob_data:          Copy of data read from the user space buffer that @jc
+ *                         points to.
+ * @coreref_state:         state of the atom with respect to retention of shader
+ *                         cores for affinity & power management.
+ * @fence:                 Stores either an input or output sync fence, depending
+ *                         on soft-job type
+ * @sync_waiter:           Pointer to the sync fence waiter structure passed to the
+ *                         callback function on signaling of the input fence.
+ * @dma_fence:             object containing pointers to both input & output fences
+ *                         and other related members used for explicit sync through
+ *                         soft jobs and for the implicit synchronization required
+ *                         on access to external resources.
+ * @event_code:            Event code for the job chain represented by the atom, both
+ *                         HW and low-level SW events are represented by event codes.
+ * @core_req:              bitmask of BASE_JD_REQ_* flags specifying either Hw or Sw
+ *                         requirements for the job chain represented by the atom.
+ * @ticks:                 Number of scheduling ticks for which atom has been running
+ *                         on the GPU.
+ * @sched_priority:        Priority of the atom for Job scheduling, as per the
+ *                         KBASE_JS_ATOM_SCHED_PRIO_*.
+ * @poking:                Indicates whether poking of MMU is ongoing for the atom,
+ *                         as a WA for the issue HW_ISSUE_8316.
+ * @completed:             Wait queue to wait upon for the completion of atom.
+ * @status:                Indicates at high level at what stage the atom is in,
+ *                         as per KBASE_JD_ATOM_STATE_*, that whether it is not in
+ *                         use or its queued in JD or given to JS or submitted to Hw
+ *                         or it completed the execution on Hw.
+ * @work_id:               used for GPU tracepoints, its a snapshot of the 'work_id'
+ *                         counter in kbase_jd_context which is incremented on
+ *                         every call to base_jd_submit.
+ * @slot_nr:               Job slot chosen for the atom.
+ * @atom_flags:            bitmask of KBASE_KATOM_FLAG* flags capturing the exact
+ *                         low level state of the atom.
+ * @retry_count:           Number of times this atom has been retried. Used by replay
+ *                         soft job.
+ * @gpu_rb_state:          bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely tracking
+ *                         atom's state after it has entered Job scheduler on becoming
+ *                         runnable. Atom could be blocked due to cross slot dependency
+ *                         or waiting for the shader cores to become available or
+ *                         waiting for protected mode transitions to complete.
+ * @need_cache_flush_cores_retained: flag indicating that manual flush of GPU
+ *                         cache is needed for the atom and the shader cores used
+ *                         for atom have been kept on.
+ * @blocked:               flag indicating that atom's resubmission to GPU is
+ *                         blocked till the work item is scheduled to return the
+ *                         atom to JS.
+ * @pre_dep:               Pointer to atom that this atom has same-slot dependency on
+ * @post_dep:              Pointer to atom that has same-slot dependency on this atom
+ * @x_pre_dep:             Pointer to atom that this atom has cross-slot dependency on
+ * @x_post_dep:            Pointer to atom that has cross-slot dependency on this atom
+ * @flush_id:              The GPU's flush count recorded at the time of submission,
+ *                         used for the cache flush optimisation
+ * @fault_event:           Info for dumping the debug data on Job fault.
+ * @queue:                 List head used for 4 different purposes :
+ *                         Adds atom to the list of dma-buf fence waiting atoms.
+ *                         Adds atom to the list of atoms blocked due to cross
+ *                         slot dependency.
+ *                         Adds atom to the list of softjob atoms for which JIT
+ *                         allocation has been deferred
+ *                         Adds atom to the list of softjob atoms waiting for the
+ *                         signaling of fence.
+ * @jit_node:              Used to keep track of all JIT free/alloc jobs in submission order
+ * @jit_blocked:           Flag indicating that JIT allocation requested through
+ *                         softjob atom will be reattempted after the impending
+ *                         free of other active JIT allocations.
+ * @will_fail_event_code:  If non-zero, this indicates that the atom will fail
+ *                         with the set event_code when the atom is processed.
+ *                         Used for special handling of atoms, which have a data
+ *                         dependency on the failed atoms.
+ * @protected_state:       State of the atom, as per KBASE_ATOM_(ENTER|EXIT)_PROTECTED_*,
+ *                         when transitioning into or out of protected mode. Atom will
+ *                         be either entering or exiting the protected mode.
+ * @runnable_tree_node:    The node added to context's job slot specific rb tree
+ *                         when the atom becomes runnable.
+ * @age:                   Age of atom relative to other atoms in the context, is
+ *                         snapshot of the age_count counter in kbase context.
+ */
 struct kbase_jd_atom {
 	struct work_struct work;
 	ktime_t start_timestamp;
@@ -427,25 +599,17 @@
 	struct list_head dep_head[2];
 	struct list_head dep_item[2];
 	const struct kbase_jd_atom_dependency dep[2];
-	/* List head used during job dispatch job_done processing - as
-	 * dependencies may not be entirely resolved at this point, we need to
-	 * use a separate list head. */
 	struct list_head jd_item;
-	/* true if atom's jd_item is currently on a list. Prevents atom being
-	 * processed twice. */
 	bool in_jd_list;
 
 	u16 nr_extres;
 	struct kbase_ext_res *extres;
 
 	u32 device_nr;
-	u64 affinity;
 	u64 jc;
-	/* Copy of data read from the user space buffer that jc points to */
 	void *softjob_data;
 	enum kbase_atom_coreref_state coreref_state;
 #if defined(CONFIG_SYNC)
-	/* Stores either an input or output fence, depending on soft-job type */
 	struct sync_fence *fence;
 	struct sync_fence_waiter sync_waiter;
 #endif				/* CONFIG_SYNC */
@@ -519,73 +683,49 @@
 
 	/* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
 	enum base_jd_event_code event_code;
-	base_jd_core_req core_req;	    /**< core requirements */
+	base_jd_core_req core_req;
 
 	u32 ticks;
-	/* JS atom priority with respect to other atoms on its kctx. */
 	int sched_priority;
 
-	int poking;		/* BASE_HW_ISSUE_8316 */
+	int poking;
 
 	wait_queue_head_t completed;
 	enum kbase_jd_atom_state status;
 #ifdef CONFIG_GPU_TRACEPOINTS
 	int work_id;
 #endif
-	/* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */
 	int slot_nr;
 
 	u32 atom_flags;
 
-	/* Number of times this atom has been retried. Used by replay soft job.
-	 */
 	int retry_count;
 
 	enum kbase_atom_gpu_rb_state gpu_rb_state;
 
-	u64 need_cache_flush_cores_retained;
+	bool need_cache_flush_cores_retained;
 
 	atomic_t blocked;
 
-	/* Pointer to atom that this atom has same-slot dependency on */
 	struct kbase_jd_atom *pre_dep;
-	/* Pointer to atom that has same-slot dependency on this atom */
 	struct kbase_jd_atom *post_dep;
 
-	/* Pointer to atom that this atom has cross-slot dependency on */
 	struct kbase_jd_atom *x_pre_dep;
-	/* Pointer to atom that has cross-slot dependency on this atom */
 	struct kbase_jd_atom *x_post_dep;
 
-	/* The GPU's flush count recorded at the time of submission, used for
-	 * the cache flush optimisation */
 	u32 flush_id;
 
-	struct kbase_jd_atom_backend backend;
 #ifdef CONFIG_DEBUG_FS
 	struct base_job_fault_event fault_event;
 #endif
 
-	/* List head used for three different purposes:
-	 *  1. Overflow list for JS ring buffers. If an atom is ready to run,
-	 *     but there is no room in the JS ring buffer, then the atom is put
-	 *     on the ring buffer's overflow list using this list node.
-	 *  2. List of waiting soft jobs.
-	 */
 	struct list_head queue;
 
-	/* Used to keep track of all JIT free/alloc jobs in submission order
-	 */
 	struct list_head jit_node;
 	bool jit_blocked;
 
-	/* If non-zero, this indicates that the atom will fail with the set
-	 * event_code when the atom is processed. */
 	enum base_jd_event_code will_fail_event_code;
 
-	/* Atoms will only ever be transitioning into, or out of
-	 * protected mode so we do not need two separate fields.
-	 */
 	union {
 		enum kbase_atom_enter_protected_state enter;
 		enum kbase_atom_exit_protected_state exit;
@@ -593,10 +733,33 @@
 
 	struct rb_node runnable_tree_node;
 
-	/* 'Age' of atom relative to other atoms in the context. */
 	u32 age;
 };
 
+/**
+ * struct kbase_debug_copy_buffer - information about the buffer to be copied.
+ *
+ * @size:	size of the buffer in bytes
+ * @pages:	pointer to an array of pointers to the pages which contain
+ *		the buffer
+ * @nr_pages:	number of pages
+ * @offset:	offset into the pages
+ * @gpu_alloc:	pointer to physical memory allocated by the GPU
+ * @extres_pages: array of pointers to the pages containing external resources
+ *		for this buffer
+ * @nr_extres_pages: number of pages in @extres_pages
+ */
+struct kbase_debug_copy_buffer {
+	size_t size;
+	struct page **pages;
+	int nr_pages;
+	size_t offset;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+
+	struct page **extres_pages;
+	int nr_extres_pages;
+};
+
 static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom)
 {
 	return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED);
@@ -612,38 +775,57 @@
 
 #define KBASE_JD_DEP_QUEUE_SIZE 256
 
+/**
+ * struct kbase_jd_context  - per context object encapsulating all the Job dispatcher
+ *                            related state.
+ * @lock:                     lock to serialize the updates made to the Job dispatcher
+ *                            state and kbase_jd_atom objects.
+ * @sched_info:               Structure encapsulating all the Job scheduling info.
+ * @atoms:                    Array of the objects representing atoms, containing
+ *                            the complete state and attributes of an atom.
+ * @job_nr:                   Tracks the number of atoms being processed by the
+ *                            kbase. This includes atoms that are not tracked by
+ *                            scheduler: 'not ready to run' & 'dependency-only' jobs.
+ * @zero_jobs_wait:           Waitq that reflects whether there are no jobs
+ *                            (including SW-only dependency jobs). This is set
+ *                            when no jobs are present on the ctx, and clear when
+ *                            there are jobs.
+ *                            This must be updated atomically with @job_nr.
+ *                            note: Job Dispatcher knows about more jobs than the
+ *                            Job Scheduler as it is unaware of jobs that are
+ *                            blocked on dependencies and SW-only dependency jobs.
+ *                            This waitq can be waited upon to find out when the
+ *                            context jobs are all done/cancelled (including those
+ *                            that might've been blocked on dependencies) - and so,
+ *                            whether it can be terminated. However, it should only
+ *                            be terminated once it is not present in the run-pool.
+ *                            Since the waitq is only set under @lock, the waiter
+ *                            should also briefly obtain and drop @lock to guarantee
+ *                            that the setter has completed its work on the kbase_context
+ * @job_done_wq:              Workqueue to which the per atom work item is queued
+ *                            for bottom half processing when the atom completes
+ *                            execution on GPU or the input fence get signaled.
+ * @tb_lock:                  Lock to serialize the write access made to @tb to
+ *                            to store the register access trace messages.
+ * @tb:                       Pointer to the Userspace accessible buffer storing
+ *                            the trace messages for register read/write accesses
+ *                            made by the Kbase. The buffer is filled in circular
+ *                            fashion.
+ * @tb_wrap_offset:           Offset to the end location in the trace buffer, the
+ *                            write pointer is moved to the beginning on reaching
+ *                            this offset.
+ * @work_id:                  atomic variable used for GPU tracepoints, incremented
+ *                            on every call to base_jd_submit.
+ */
 struct kbase_jd_context {
 	struct mutex lock;
 	struct kbasep_js_kctx_info sched_info;
 	struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
 
-	/** Tracks all job-dispatch jobs.  This includes those not tracked by
-	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
 	u32 job_nr;
 
-	/** Waitq that reflects whether there are no jobs (including SW-only
-	 * dependency jobs). This is set when no jobs are present on the ctx,
-	 * and clear when there are jobs.
-	 *
-	 * @note: Job Dispatcher knows about more jobs than the Job Scheduler:
-	 * the Job Scheduler is unaware of jobs that are blocked on dependencies,
-	 * and SW-only dependency jobs.
-	 *
-	 * This waitq can be waited upon to find out when the context jobs are all
-	 * done/cancelled (including those that might've been blocked on
-	 * dependencies) - and so, whether it can be terminated. However, it should
-	 * only be terminated once it is not present in the run-pool (see
-	 * kbasep_js_kctx_info::ctx::is_scheduled).
-	 *
-	 * Since the waitq is only set under kbase_jd_context::lock,
-	 * the waiter should also briefly obtain and drop kbase_jd_context::lock to
-	 * guarentee that the setter has completed its work on the kbase_context
-	 *
-	 * This must be updated atomically with:
-	 * - kbase_jd_context::job_nr */
 	wait_queue_head_t zero_jobs_wait;
 
-	/** Job Done workqueue. */
 	struct workqueue_struct *job_done_wq;
 
 	spinlock_t tb_lock;
@@ -675,15 +857,34 @@
 };
 
 /**
- * Important: Our code makes assumptions that a struct kbase_as structure is always at
- * kbase_device->as[number]. This is used to recover the containing
- * struct kbase_device from a struct kbase_as structure.
- *
- * Therefore, struct kbase_as structures must not be allocated anywhere else.
+ * struct kbase_as   - object representing an address space of GPU.
+ * @number:            Index at which this address space structure is present
+ *                     in an array of address space structures embedded inside the
+ *                     struct kbase_device.
+ * @pf_wq:             Workqueue for processing work items related to Bus fault
+ *                     and Page fault handling.
+ * @work_pagefault:    Work item for the Page fault handling.
+ * @work_busfault:     Work item for the Bus fault handling.
+ * @fault_type:        Type of fault which occured for this address space,
+ *                     regular/unexpected Bus or Page fault.
+ * @protected_mode:    Flag indicating whether the fault occurred in protected
+ *                     mode or not.
+ * @fault_status:      Records the fault status as reported by Hw.
+ * @fault_addr:        Records the faulting address.
+ * @fault_extra_addr:  Records the secondary fault address.
+ * @current_setup:     Stores the MMU configuration for this address space.
+ * @poke_wq:           Workqueue to process the work items queue for poking the
+ *                     MMU as a WA for BASE_HW_ISSUE_8316.
+ * @poke_work:         Work item to do the poking of MMU for this address space.
+ * @poke_refcount:     Refcount for the need of poking MMU. While the refcount is
+ *                     non zero the poking of MMU will continue.
+ *                     Protected by hwaccess_lock.
+ * @poke_state:        State indicating whether poking is in progress or it has
+ *                     been stopped. Protected by hwaccess_lock.
+ * @poke_timer:        Timer used to schedule the poking at regular intervals.
  */
 struct kbase_as {
 	int number;
-
 	struct workqueue_struct *pf_wq;
 	struct work_struct work_pagefault;
 	struct work_struct work_busfault;
@@ -692,19 +893,36 @@
 	u32 fault_status;
 	u64 fault_addr;
 	u64 fault_extra_addr;
-
 	struct kbase_mmu_setup current_setup;
-
-	/* BASE_HW_ISSUE_8316  */
 	struct workqueue_struct *poke_wq;
 	struct work_struct poke_work;
-	/** Protected by hwaccess_lock */
 	int poke_refcount;
-	/** Protected by hwaccess_lock */
 	kbase_as_poke_state poke_state;
 	struct hrtimer poke_timer;
 };
 
+/**
+ * struct kbase_mmu_table  - object representing a set of GPU page tables
+ * @mmu_teardown_pages:   Buffer of 4 Pages in size, used to cache the entries
+ *                        of top & intermediate level page tables to avoid
+ *                        repeated calls to kmap_atomic during the MMU teardown.
+ * @mmu_lock:             Lock to serialize the accesses made to multi level GPU
+ *                        page tables
+ * @pgd:                  Physical address of the page allocated for the top
+ *                        level page table of the context, this is used for
+ *                        MMU HW programming as the address translation will
+ *                        start from the top level page table.
+ * @kctx:                 If this set of MMU tables belongs to a context then
+ *                        this is a back-reference to the context, otherwise
+ *                        it is NULL
+ */
+struct kbase_mmu_table {
+	u64 *mmu_teardown_pages;
+	struct mutex mmu_lock;
+	phys_addr_t pgd;
+	struct kbase_context *kctx;
+};
+
 static inline int kbase_as_has_bus_fault(struct kbase_as *as)
 {
 	return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
@@ -738,6 +956,37 @@
 #define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
 #define KBASE_TRACE_FLAG_JOBSLOT  (((u8)1) << 1)
 
+/**
+ * struct kbase_trace - object representing a trace message added to trace buffer
+ *                      kbase_device::trace_rbuf
+ * @timestamp:          CPU timestamp at which the trace message was added.
+ * @thread_id:          id of the thread in the context of which trace message
+ *                      was added.
+ * @cpu:                indicates which CPU the @thread_id was scheduled on when
+ *                      the trace message was added.
+ * @ctx:                Pointer to the kbase context for which the trace message
+ *                      was added. Will be NULL for certain trace messages like
+ *                      for traces added corresponding to power management events.
+ *                      Will point to the appropriate context corresponding to
+ *                      job-slot & context's reference count related events.
+ * @katom:              indicates if the trace message has atom related info.
+ * @atom_number:        id of the atom for which trace message was added.
+ *                      Only valid if @katom is true.
+ * @atom_udata:         Copy of the user data sent for the atom in base_jd_submit.
+ *                      Only valid if @katom is true.
+ * @gpu_addr:           GPU address of the job-chain represented by atom. Could
+ *                      be valid even if @katom is false.
+ * @info_val:           value specific to the type of event being traced. For the
+ *                      case where @katom is true, will be set to atom's affinity,
+ *                      i.e. bitmask of shader cores chosen for atom's execution.
+ * @code:               Identifies the event, refer enum kbase_trace_code.
+ * @jobslot:            job-slot for which trace message was added, valid only for
+ *                      job-slot management events.
+ * @refcount:           reference count for the context, valid for certain events
+ *                      related to scheduler core and policy.
+ * @flags:              indicates if info related to @jobslot & @refcount is present
+ *                      in the trace message, used during dumping of the message.
+ */
 struct kbase_trace {
 	struct timespec timestamp;
 	u32 thread_id;
@@ -754,88 +1003,6 @@
 	u8 flags;
 };
 
-/** Event IDs for the power management framework.
- *
- * Any of these events might be missed, so they should not be relied upon to
- * find the precise state of the GPU at a particular time in the
- * trace. Overall, we should get a high percentage of these events for
- * statisical purposes, and so a few missing should not be a problem */
-enum kbase_timeline_pm_event {
-	/* helper for tests */
-	KBASEP_TIMELINE_PM_EVENT_FIRST,
-
-	/** Event reserved for backwards compatibility with 'init' events */
-	KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST,
-
-	/** The power state of the device has changed.
-	 *
-	 * Specifically, the device has reached a desired or available state.
-	 */
-	KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED,
-
-	/** The GPU is becoming active.
-	 *
-	 * This event is sent when the first context is about to use the GPU.
-	 */
-	KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE,
-
-	/** The GPU is becoming idle.
-	 *
-	 * This event is sent when the last context has finished using the GPU.
-	 */
-	KBASE_TIMELINE_PM_EVENT_GPU_IDLE,
-
-	/** Event reserved for backwards compatibility with 'policy_change'
-	 * events */
-	KBASE_TIMELINE_PM_EVENT_RESERVED_4,
-
-	/** Event reserved for backwards compatibility with 'system_suspend'
-	 * events */
-	KBASE_TIMELINE_PM_EVENT_RESERVED_5,
-
-	/** Event reserved for backwards compatibility with 'system_resume'
-	 * events */
-	KBASE_TIMELINE_PM_EVENT_RESERVED_6,
-
-	/** The job scheduler is requesting to power up/down cores.
-	 *
-	 * This event is sent when:
-	 * - powered down cores are needed to complete a job
-	 * - powered up cores are not needed anymore
-	 */
-	KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
-
-	KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
-};
-
-#ifdef CONFIG_MALI_TRACE_TIMELINE
-struct kbase_trace_kctx_timeline {
-	atomic_t jd_atoms_in_flight;
-	u32 owner_tgid;
-};
-
-struct kbase_trace_kbdev_timeline {
-	/* Note: strictly speaking, not needed, because it's in sync with
-	 * kbase_device::jm_slots[]::submitted_nr
-	 *
-	 * But it's kept as an example of how to add global timeline tracking
-	 * information
-	 *
-	 * The caller must hold hwaccess_lock when accessing this */
-	u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS];
-
-	/* Last UID for each PM event */
-	atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1];
-	/* Counter for generating PM event UIDs */
-	atomic_t pm_event_uid_counter;
-	/*
-	 * L2 transition state - true indicates that the transition is ongoing
-	 * Expected to be protected by hwaccess_lock */
-	bool l2_transitioning;
-};
-#endif /* CONFIG_MALI_TRACE_TIMELINE */
-
-
 struct kbasep_kctx_list_element {
 	struct list_head link;
 	struct kbase_context *kctx;
@@ -857,7 +1024,11 @@
 	 */
 	struct mutex lock;
 
-	/** The reference count of active contexts on this device. */
+	/**
+	 * The reference count of active contexts on this device. Note that
+	 * some code paths keep shaders/the tiler powered whilst this is 0. Use
+	 * kbase_pm_is_active() instead to check for such cases.
+	 */
 	int active_count;
 	/** Flag indicating suspending/suspended */
 	bool suspending;
@@ -904,19 +1075,23 @@
 
 /**
  * struct kbase_mem_pool - Page based memory pool for kctx/kbdev
- * @kbdev:     Kbase device where memory is used
- * @cur_size:  Number of free pages currently in the pool (may exceed @max_size
- *             in some corner cases)
- * @max_size:  Maximum number of free pages in the pool
- * @order:     order = 0 refers to a pool of 4 KB pages
- *             order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB)
- * @pool_lock: Lock protecting the pool - must be held when modifying @cur_size
- *             and @page_list
- * @page_list: List of free pages in the pool
- * @reclaim:   Shrinker for kernel reclaim of free pages
- * @next_pool: Pointer to next pool where pages can be allocated when this pool
- *             is empty. Pages will spill over to the next pool when this pool
- *             is full. Can be NULL if there is no next pool.
+ * @kbdev:        Kbase device where memory is used
+ * @cur_size:     Number of free pages currently in the pool (may exceed
+ *                @max_size in some corner cases)
+ * @max_size:     Maximum number of free pages in the pool
+ * @order:        order = 0 refers to a pool of 4 KB pages
+ *                order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB)
+ * @pool_lock:    Lock protecting the pool - must be held when modifying
+ *                @cur_size and @page_list
+ * @page_list:    List of free pages in the pool
+ * @reclaim:      Shrinker for kernel reclaim of free pages
+ * @next_pool:    Pointer to next pool where pages can be allocated when this
+ *                pool is empty. Pages will spill over to the next pool when
+ *                this pool is full. Can be NULL if there is no next pool.
+ * @dying:        true if the pool is being terminated, and any ongoing
+ *                operations should be abandoned
+ * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from
+ *                this pool, eg during a grow operation
  */
 struct kbase_mem_pool {
 	struct kbase_device *kbdev;
@@ -928,6 +1103,9 @@
 	struct shrinker     reclaim;
 
 	struct kbase_mem_pool *next_pool;
+
+	bool dying;
+	bool dont_reclaim;
 };
 
 /**
@@ -943,9 +1121,33 @@
 	u64 core_mask;
 };
 
+/* MMU mode flags */
+#define KBASE_MMU_MODE_HAS_NON_CACHEABLE (1ul << 0) /* Has NON_CACHEABLE MEMATTR */
+
+/**
+ * struct kbase_mmu_mode - object containing pointer to methods invoked for
+ *                         programming the MMU, as per the MMU mode supported
+ *                         by Hw.
+ * @update:           enable & setup/configure one of the GPU address space.
+ * @get_as_setup:     retrieve the configuration of one of the GPU address space.
+ * @disable_as:       disable one of the GPU address space.
+ * @pte_to_phy_addr:  retrieve the physical address encoded in the page table entry.
+ * @ate_is_valid:     check if the pte is a valid address translation entry
+ *                    encoding the physical address of the actual mapped page.
+ * @pte_is_valid:     check if the pte is a valid entry encoding the physical
+ *                    address of the next lower level page table.
+ * @entry_set_ate:    program the pte to be a valid address translation entry to
+ *                    encode the physical address of the actual page being mapped.
+ * @entry_set_pte:    program the pte to be a valid entry to encode the physical
+ *                    address of the next lower level page table.
+ * @entry_invalidate: clear out or invalidate the pte.
+ * @flags:            bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants.
+ */
 struct kbase_mmu_mode {
-	void (*update)(struct kbase_context *kctx);
-	void (*get_as_setup)(struct kbase_context *kctx,
+	void (*update)(struct kbase_device *kbdev,
+			struct kbase_mmu_table *mmut,
+			int as_nr);
+	void (*get_as_setup)(struct kbase_mmu_table *mmut,
 			struct kbase_mmu_setup * const setup);
 	void (*disable_as)(struct kbase_device *kbdev, int as_nr);
 	phys_addr_t (*pte_to_phy_addr)(u64 entry);
@@ -955,6 +1157,7 @@
 			unsigned long flags, unsigned int level);
 	void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
 	void (*entry_invalidate)(u64 *entry);
+	unsigned long flags;
 };
 
 struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
@@ -963,9 +1166,250 @@
 
 #define DEVNAME_SIZE	16
 
-struct kbase_device {
-	s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS];
 
+/**
+ * struct kbase_device   - Object representing an instance of GPU platform device,
+ *                         allocated from the probe method of mali driver.
+ * @hw_quirks_sc:          Configuration to be used for the shader cores as per
+ *                         the HW issues present in the GPU.
+ * @hw_quirks_tiler:       Configuration to be used for the Tiler as per the HW
+ *                         issues present in the GPU.
+ * @hw_quirks_mmu:         Configuration to be used for the MMU as per the HW
+ *                         issues present in the GPU.
+ * @hw_quirks_jm:          Configuration to be used for the Job Manager as per
+ *                         the HW issues present in the GPU.
+ * @entry:                 Links the device instance to the global list of GPU
+ *                         devices. The list would have as many entries as there
+ *                         are GPU device instances.
+ * @dev:                   Pointer to the kernel's generic/base representation
+ *                         of the GPU platform device.
+ * @mdev:                  Pointer to the miscellaneous device registered to
+ *                         provide Userspace access to kernel driver through the
+ *                         device file /dev/malixx.
+ * @reg_start:             Base address of the region in physical address space
+ *                         where GPU registers have been mapped.
+ * @reg_size:              Size of the region containing GPU registers
+ * @reg:                   Kernel virtual address of the region containing GPU
+ *                         registers, using which Driver will access the registers.
+ * @irqs:                  Array containing IRQ resource info for 3 types of
+ *                         interrupts : Job scheduling, MMU & GPU events (like
+ *                         power management, cache etc.)
+ * @clock:                 Pointer to the input clock resource (having an id of 0),
+ *                         referenced by the GPU device node.
+ * @regulator:             Pointer to the struct corresponding to the regulator
+ *                         for GPU device
+ * @devname:               string containing the name used for GPU device instance,
+ *                         miscellaneous device is registered using the same name.
+ * @model:                 Pointer, valid only when Driver is compiled to not access
+ *                         the real GPU Hw, to the dummy model which tries to mimic
+ *                         to some extent the state & behavior of GPU Hw in response
+ *                         to the register accesses made by the Driver.
+ * @irq_slab:              slab cache for allocating the work items queued when
+ *                         model mimics raising of IRQ to cause an interrupt on CPU.
+ * @irq_workq:             workqueue for processing the irq work items.
+ * @serving_job_irq:       function to execute work items queued when model mimics
+ *                         the raising of JS irq, mimics the interrupt handler
+ *                         processing JS interrupts.
+ * @serving_gpu_irq:       function to execute work items queued when model mimics
+ *                         the raising of GPU irq, mimics the interrupt handler
+ *                         processing GPU interrupts.
+ * @serving_mmu_irq:       function to execute work items queued when model mimics
+ *                         the raising of MMU irq, mimics the interrupt handler
+ *                         processing MMU interrupts.
+ * @reg_op_lock:           lock used by model to serialize the handling of register
+ *                         accesses made by the driver.
+ * @pm:                    Per device object for storing data for power management
+ *                         framework.
+ * @js_data:               Per device object encapsulating the current context of
+ *                         Job Scheduler, which is global to the device and is not
+ *                         tied to any particular struct kbase_context running on
+ *                         the device
+ * @mem_pool:              Object containing the state for global pool of 4KB size
+ *                         physical pages which can be used by all the contexts.
+ * @lp_mem_pool:           Object containing the state for global pool of 2MB size
+ *                         physical pages which can be used by all the contexts.
+ * @memdev:                keeps track of the in use physical pages allocated by
+ *                         the Driver.
+ * @mmu_mode:              Pointer to the object containing methods for programming
+ *                         the MMU, depending on the type of MMU supported by Hw.
+ * @as:                    Array of objects representing address spaces of GPU.
+ * @as_free:               Bitpattern of free/available GPU address spaces.
+ * @as_to_kctx:            Array of pointers to struct kbase_context, having
+ *                         GPU adrress spaces assigned to them.
+ * @mmu_mask_change:       Lock to serialize the access to MMU interrupt mask
+ *                         register used in the handling of Bus & Page faults.
+ * @gpu_props:             Object containing complete information about the
+ *                         configuration/properties of GPU HW device in use.
+ * @hw_issues_mask:        List of SW workarounds for HW issues
+ * @hw_features_mask:      List of available HW features.
+ * @shader_needed_cnt:     Count for the 64 shader cores, incremented when
+ *                         shaders are requested for use and decremented later
+ *                         when they are no longer required.
+ * @tiler_needed_cnt:      Count for the Tiler block shader cores, incremented
+ *                         when Tiler is requested for use and decremented
+ *                         later when the Tiler is no longer required.
+ * @disjoint_event:        struct for keeping track of the disjoint information,
+ *                         that whether the GPU is in a disjoint state and the
+ *                         number of disjoint events that have occurred on GPU.
+ * @l2_users_count:        Refcount for tracking users of the l2 cache, e.g.
+ *                         when using hardware counter instrumentation.
+ * @shader_available_bitmap: Bitmap of shader cores that are currently available,
+ *                         powered up and the power policy is happy for jobs
+ *                         to be submitted to these cores. These are updated
+ *                         by the power management code. The job scheduler
+ *                         should avoid submitting new jobs to any cores
+ *                         that are not marked as available.
+ * @tiler_available_bitmap: Bitmap of tiler units that are currently available.
+ * @l2_available_bitmap:    Bitmap of the currently available Level 2 caches.
+ * @stack_available_bitmap: Bitmap of the currently available Core stacks.
+ * @shader_ready_bitmap:    Bitmap of shader cores that are ready (powered on)
+ * @shader_transitioning_bitmap: Bitmap of shader cores that are currently changing
+ *                         power state.
+ * @nr_hw_address_spaces:  Number of address spaces actually available in the
+ *                         GPU, remains constant after driver initialisation.
+ * @nr_user_address_spaces: Number of address spaces available to user contexts
+ * @hwcnt:                  Structure used for instrumentation and HW counters
+ *                         dumping
+ * @vinstr_ctx:            vinstr context created per device
+ * @trace_lock:            Lock to serialize the access to trace buffer.
+ * @trace_first_out:       Index/offset in the trace buffer at which the first
+ *                         unread message is present.
+ * @trace_next_in:         Index/offset in the trace buffer at which the new
+ *                         message will be written.
+ * @trace_rbuf:            Pointer to the buffer storing debug messages/prints
+ *                         tracing the various events in Driver.
+ *                         The buffer is filled in circular fashion.
+ * @reset_timeout_ms:      Number of milliseconds to wait for the soft stop to
+ *                         complete for the GPU jobs before proceeding with the
+ *                         GPU reset.
+ * @cacheclean_lock:       Lock to serialize the clean & invalidation of GPU caches,
+ *                         between Job Manager backend & Instrumentation code.
+ * @platform_context:      Platform specific private data to be accessed by
+ *                         platform specific config files only.
+ * @kctx_list:             List of kbase_contexts created for the device, including
+ *                         the kbase_context created for vinstr_ctx.
+ * @kctx_list_lock:        Lock protecting concurrent accesses to @kctx_list.
+ * @devfreq_profile:       Describes devfreq profile for the Mali GPU device, passed
+ *                         to devfreq_add_device() to add devfreq feature to Mali
+ *                         GPU device.
+ * @devfreq:               Pointer to devfreq structure for Mali GPU device,
+ *                         returned on the call to devfreq_add_device().
+ * @current_freq:          The real frequency, corresponding to @current_nominal_freq,
+ *                         at which the Mali GPU device is currently operating, as
+ *                         retrieved from @opp_table in the target callback of
+ *                         @devfreq_profile.
+ * @current_nominal_freq:  The nominal frequency currently used for the Mali GPU
+ *                         device as retrieved through devfreq_recommended_opp()
+ *                         using the freq value passed as an argument to target
+ *                         callback of @devfreq_profile
+ * @current_voltage:       The voltage corresponding to @current_nominal_freq, as
+ *                         retrieved through dev_pm_opp_get_voltage().
+ * @current_core_mask:     bitmask of shader cores that are currently desired &
+ *                         enabled, corresponding to @current_nominal_freq as
+ *                         retrieved from @opp_table in the target callback of
+ *                         @devfreq_profile.
+ * @opp_table:             Pointer to the lookup table for converting between nominal
+ *                         OPP (operating performance point) frequency, and real
+ *                         frequency and core mask. This table is constructed according
+ *                         to operating-points-v2-mali table in devicetree.
+ * @num_opps:              Number of operating performance points available for the Mali
+ *                         GPU device.
+ * @devfreq_cooling:       Pointer returned on registering devfreq cooling device
+ *                         corresponding to @devfreq.
+ * @ipa_protection_mode_switched: is set to TRUE when GPU is put into protected
+ *                         mode. It is a sticky flag which is cleared by IPA
+ *                         once it has made use of information that GPU had
+ *                         previously entered protected mode.
+ * @ipa:                   Top level structure for IPA, containing pointers to both
+ *                         configured & fallback models.
+ * @timeline:              Stores the global timeline tracking information.
+ * @job_fault_debug:       Flag to control the dumping of debug data for job faults,
+ *                         set when the 'job_fault' debugfs file is opened.
+ * @mali_debugfs_directory: Root directory for the debugfs files created by the driver
+ * @debugfs_ctx_directory: Directory inside the @mali_debugfs_directory containing
+ *                         a sub-directory for every context.
+ * @debugfs_as_read_bitmap: bitmap of address spaces for which the bus or page fault
+ *                         has occurred.
+ * @job_fault_wq:          Waitqueue to block the job fault dumping daemon till the
+ *                         occurrence of a job fault.
+ * @job_fault_resume_wq:   Waitqueue on which every context with a faulty job wait
+ *                         for the job fault dumping to complete before they can
+ *                         do bottom half of job done for the atoms which followed
+ *                         the faulty atom.
+ * @job_fault_resume_workq: workqueue to process the work items queued for the faulty
+ *                         atoms, whereby the work item function waits for the dumping
+ *                         to get completed.
+ * @job_fault_event_list:  List of atoms, each belonging to a different context, which
+ *                         generated a job fault.
+ * @job_fault_event_lock:  Lock to protect concurrent accesses to @job_fault_event_list
+ * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs
+ *                         file "read_register".
+ * @kbase_profiling_controls: Profiling controls set by gator to control frame buffer
+ *                         dumping and s/w counter reporting.
+ * @force_replay_limit:    Number of gpu jobs, having replay atoms associated with them,
+ *                         that are run before a job is forced to fail and replay.
+ *                         Set to 0 to disable forced failures.
+ * @force_replay_count:    Count of gpu jobs, having replay atoms associated with them,
+ *                         between forced failures. Incremented on each gpu job which
+ *                         has replay atoms dependent on it. A gpu job is forced to
+ *                         fail once this is greater than or equal to @force_replay_limit
+ * @force_replay_core_req: Core requirements, set through the sysfs file, for the replay
+ *                         job atoms to consider the associated gpu job for forceful
+ *                         failure and replay. May be zero
+ * @force_replay_random:   Set to 1 to randomize the @force_replay_limit, in the
+ *                         range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
+ * @ctx_num:               Total number of contexts created for the device.
+ * @io_history:            Pointer to an object keeping a track of all recent
+ *                         register accesses. The history of register accesses
+ *                         can be read through "regs_history" debugfs file.
+ * @hwaccess:              Contains a pointer to active kbase context and GPU
+ *                         backend specific data for HW access layer.
+ * @faults_pending:        Count of page/bus faults waiting for bottom half processing
+ *                         via workqueues.
+ * @poweroff_pending:      Set when power off operation for GPU is started, reset when
+ *                         power on for GPU is started.
+ * @infinite_cache_active_default: Set to enable using infinite cache for all the
+ *                         allocations of a new context.
+ * @mem_pool_max_size_default: Initial/default value for the maximum size of both
+ *                         types of pool created for a new context.
+ * @current_gpu_coherency_mode: coherency mode in use, which can be different
+ *                         from @system_coherency, when using protected mode.
+ * @system_coherency:      coherency mode as retrieved from the device tree.
+ * @cci_snoop_enabled:     Flag to track when CCI snoops have been enabled.
+ * @snoop_enable_smc:      SMC function ID to call into Trusted firmware to
+ *                         enable cache snooping. Value of 0 indicates that it
+ *                         is not used.
+ * @snoop_disable_smc:     SMC function ID to call disable cache snooping.
+ * @protected_ops:         Pointer to the methods for switching in or out of the
+ *                         protected mode, as per the @protected_dev being used.
+ * @protected_dev:         Pointer to the protected mode switcher device attached
+ *                         to the GPU device retrieved through device tree if
+ *                         GPU do not support protected mode switching natively.
+ * @protected_mode:        set to TRUE when GPU is put into protected mode
+ * @protected_mode_transition: set to TRUE when GPU is transitioning into or
+ *                         out of protected mode.
+ * @protected_mode_support: set to true if protected mode is supported.
+ * @buslogger:              Pointer to the structure required for interfacing
+ *                          with the bus logger module to set the size of buffer
+ *                          used by the module for capturing bus logs.
+ * @irq_reset_flush:        Flag to indicate that GPU reset is in-flight and flush of
+ *                          IRQ + bottom half is being done, to prevent the writes
+ *                          to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers.
+ * @inited_subsys:          Bitmap of inited sub systems at the time of device probe.
+ *                          Used during device remove or for handling error in probe.
+ * @hwaccess_lock:          Lock, which can be taken from IRQ context, to serialize
+ *                          the updates made to Job dispatcher + scheduler states.
+ * @mmu_hw_mutex:           Protects access to MMU operations and address space
+ *                          related state.
+ * @serialize_jobs:         Currently used mode for serialization of jobs, both
+ *                          intra & inter slots serialization is supported.
+ * @backup_serialize_jobs:  Copy of the original value of @serialize_jobs taken
+ *                          when GWT is enabled. Used to restore the original value
+ *                          on disabling of GWT.
+ * @js_ctx_scheduling_mode: Context scheduling mode currently being used by
+ *                          Job Scheduler
+ */
+struct kbase_device {
 	u32 hw_quirks_sc;
 	u32 hw_quirks_tiler;
 	u32 hw_quirks_mmu;
@@ -1007,66 +1451,26 @@
 	struct kbase_mmu_mode const *mmu_mode;
 
 	struct kbase_as as[BASE_MAX_NR_AS];
-	/* The below variables (as_free and as_to_kctx) are managed by the
-	 * Context Scheduler. The kbasep_js_device_data::runpool_irq::lock must
-	 * be held whilst accessing these.
-	 */
 	u16 as_free; /* Bitpattern of free Address Spaces */
-	/* Mapping from active Address Spaces to kbase_context */
 	struct kbase_context *as_to_kctx[BASE_MAX_NR_AS];
 
-
 	spinlock_t mmu_mask_change;
 
 	struct kbase_gpu_props gpu_props;
 
-	/** List of SW workarounds for HW issues */
 	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
-	/** List of features available */
 	unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
 
-	/* Bitmaps of cores that are currently in use (running jobs).
-	 * These should be kept up to date by the job scheduler.
-	 *
-	 * pm.power_change_lock should be held when accessing these members.
-	 *
-	 * kbase_pm_check_transitions_nolock() should be called when bits are
-	 * cleared to update the power management system and allow transitions to
-	 * occur. */
-	u64 shader_inuse_bitmap;
-
-	/* Refcount for cores in use */
-	u32 shader_inuse_cnt[64];
-
-	/* Bitmaps of cores the JS needs for jobs ready to run */
-	u64 shader_needed_bitmap;
-
-	/* Refcount for cores needed */
-	u32 shader_needed_cnt[64];
-
-	u32 tiler_inuse_cnt;
-
 	u32 tiler_needed_cnt;
+	u32 shader_needed_cnt;
 
-	/* struct for keeping track of the disjoint information
-	 *
-	 * The state  is > 0 if the GPU is in a disjoint state. Otherwise 0
-	 * The count is the number of disjoint events that have occurred on the GPU
-	 */
 	struct {
 		atomic_t count;
 		atomic_t state;
 	} disjoint_event;
 
-	/* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */
 	u32 l2_users_count;
 
-	/* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be
-	 * submitted to these cores. These are updated by the power management code. The job scheduler should avoid
-	 * submitting new jobs to any cores that are not marked as available.
-	 *
-	 * pm.power_change_lock should be held when accessing these members.
-	 */
 	u64 shader_available_bitmap;
 	u64 tiler_available_bitmap;
 	u64 l2_available_bitmap;
@@ -1075,10 +1479,9 @@
 	u64 shader_ready_bitmap;
 	u64 shader_transitioning_bitmap;
 
-	s8 nr_hw_address_spaces;			  /**< Number of address spaces in the GPU (constant after driver initialisation) */
-	s8 nr_user_address_spaces;			  /**< Number of address spaces available to user contexts */
+	s8 nr_hw_address_spaces;
+	s8 nr_user_address_spaces;
 
-	/* Structure used for instrumentation and HW counters dumping */
 	struct kbase_hwcnt {
 		/* The lock should be used when accessing any of the following members */
 		spinlock_t lock;
@@ -1102,10 +1505,8 @@
 
 	struct mutex cacheclean_lock;
 
-	/* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */
 	void *platform_context;
 
-	/* List of kbase_contexts created */
 	struct list_head        kctx_list;
 	struct mutex            kctx_list_lock;
 
@@ -1118,101 +1519,95 @@
 	u64 current_core_mask;
 	struct kbase_devfreq_opp *opp_table;
 	int num_opps;
+	struct kbasep_pm_metrics last_devfreq_metrics;
 #ifdef CONFIG_DEVFREQ_THERMAL
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
 	struct devfreq_cooling_device *devfreq_cooling;
 #else
 	struct thermal_cooling_device *devfreq_cooling;
 #endif
-	/* Current IPA model - true for configured model, false for fallback */
-	atomic_t ipa_use_configured_model;
+	bool ipa_protection_mode_switched;
 	struct {
 		/* Access to this struct must be with ipa.lock held */
 		struct mutex lock;
 		struct kbase_ipa_model *configured_model;
 		struct kbase_ipa_model *fallback_model;
+
+		/* Values of the PM utilization metrics from last time the
+		 * power model was invoked. The utilization is calculated as
+		 * the difference between last_metrics and the current values.
+		 */
+		struct kbasep_pm_metrics last_metrics;
+
+		/*
+		 * gpu_active_callback - Inform IPA that GPU is now active
+		 * @model_data: Pointer to model data
+		 */
+		void (*gpu_active_callback)(
+				struct kbase_ipa_model_vinstr_data *model_data);
+
+		/*
+		 * gpu_idle_callback - Inform IPA that GPU is now idle
+		 * @model_data: Pointer to model data
+		 */
+		void (*gpu_idle_callback)(
+				struct kbase_ipa_model_vinstr_data *model_data);
+
+		/* Model data to pass to ipa_gpu_active/idle() */
+		struct kbase_ipa_model_vinstr_data *model_data;
+
+		/* true if IPA is currently using vinstr */
+		bool vinstr_active;
 	} ipa;
 #endif /* CONFIG_DEVFREQ_THERMAL */
 #endif /* CONFIG_MALI_DEVFREQ */
 
-
-#ifdef CONFIG_MALI_TRACE_TIMELINE
-	struct kbase_trace_kbdev_timeline timeline;
-#endif
-
-	/*
-	 * Control for enabling job dump on failure, set when control debugfs
-	 * is opened.
-	 */
 	bool job_fault_debug;
 
 #ifdef CONFIG_DEBUG_FS
-	/* directory for debugfs entries */
 	struct dentry *mali_debugfs_directory;
-	/* Root directory for per context entry */
 	struct dentry *debugfs_ctx_directory;
 
 #ifdef CONFIG_MALI_DEBUG
-	/* bit for each as, set if there is new data to report */
 	u64 debugfs_as_read_bitmap;
 #endif /* CONFIG_MALI_DEBUG */
 
-	/* failed job dump, used for separate debug process */
 	wait_queue_head_t job_fault_wq;
 	wait_queue_head_t job_fault_resume_wq;
 	struct workqueue_struct *job_fault_resume_workq;
 	struct list_head job_fault_event_list;
 	spinlock_t job_fault_event_lock;
-	struct kbase_context *kctx_fault;
 
 #if !MALI_CUSTOMER_RELEASE
-	/* Per-device data for register dumping interface */
 	struct {
-		u16 reg_offset; /* Offset of a GPU_CONTROL register to be
-				   dumped upon request */
+		u16 reg_offset;
 	} regs_dump_debugfs_data;
 #endif /* !MALI_CUSTOMER_RELEASE */
 #endif /* CONFIG_DEBUG_FS */
 
-	/* fbdump profiling controls set by gator */
 	u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
 
 
 #if MALI_CUSTOMER_RELEASE == 0
-	/* Number of jobs that are run before a job is forced to fail and
-	 * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced
-	 * failures. */
 	int force_replay_limit;
-	/* Count of jobs between forced failures. Incremented on each job. A
-	 * job is forced to fail once this is greater than or equal to
-	 * force_replay_limit. */
 	int force_replay_count;
-	/* Core requirement for jobs to be failed and replayed. May be zero. */
 	base_jd_core_req force_replay_core_req;
-	/* true if force_replay_limit should be randomized. The random
-	 * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
-	 */
 	bool force_replay_random;
 #endif
 
-	/* Total number of created contexts */
 	atomic_t ctx_num;
 
 #ifdef CONFIG_DEBUG_FS
-	/* Holds the most recent register accesses */
 	struct kbase_io_history io_history;
 #endif /* CONFIG_DEBUG_FS */
 
 	struct kbase_hwaccess_data hwaccess;
 
-	/* Count of page/bus faults waiting for workqueues to process */
 	atomic_t faults_pending;
 
-	/* true if GPU is powered off or power off operation is in progress */
 	bool poweroff_pending;
 
 
-	/* defaults for new context created for this device */
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
 	bool infinite_cache_active_default;
 #else
@@ -1220,73 +1615,46 @@
 #endif
 	size_t mem_pool_max_size_default;
 
-	/* current gpu coherency mode */
 	u32 current_gpu_coherency_mode;
-	/* system coherency mode  */
 	u32 system_coherency;
-	/* Flag to track when cci snoops have been enabled on the interface */
+
 	bool cci_snoop_enabled;
 
-	/* SMC function IDs to call into Trusted firmware to enable/disable
-	 * cache snooping. Value of 0 indicates that they are not used
-	 */
 	u32 snoop_enable_smc;
 	u32 snoop_disable_smc;
 
-	/* Protected mode operations */
 	struct protected_mode_ops *protected_ops;
 
-	/* Protected device attached to this kbase device */
 	struct protected_mode_device *protected_dev;
 
-	/*
-	 * true when GPU is put into protected mode
-	 */
 	bool protected_mode;
 
-	/*
-	 * true when GPU is transitioning into or out of protected mode
-	 */
 	bool protected_mode_transition;
 
-	/*
-	 * true if protected mode is supported
-	 */
 	bool protected_mode_support;
 
-
-#ifdef CONFIG_MALI_DEBUG
-	wait_queue_head_t driver_inactive_wait;
-	bool driver_inactive;
-#endif /* CONFIG_MALI_DEBUG */
-
 #ifdef CONFIG_MALI_FPGA_BUS_LOGGER
-	/*
-	 * Bus logger integration.
-	 */
 	struct bus_logger_client *buslogger;
 #endif
-	/* Boolean indicating if an IRQ flush during reset is in progress. */
+
 	bool irq_reset_flush;
 
-	/* list of inited sub systems. Used during terminate/error recovery */
 	u32 inited_subsys;
 
 	spinlock_t hwaccess_lock;
 
-	/* Protects access to MMU operations */
 	struct mutex mmu_hw_mutex;
 
-	/* Current serialization mode. See KBASE_SERIALIZE_* for details */
+	/* See KBASE_SERIALIZE_* for details */
 	u8 serialize_jobs;
 
 #ifdef CONFIG_MALI_JOB_DUMP
-	/* Used to backup status of job serialization mode
-	 * when we use GWT and restore when GWT is disabled.
-	 * GWT uses full serialization mode.
-	 */
 	u8 backup_serialize_jobs;
 #endif
+
+	/* See KBASE_JS_*_PRIORITY_MODE for details. */
+	u32 js_ctx_scheduling_mode;
+
 };
 
 /**
@@ -1351,6 +1719,18 @@
  * allocation mechanism. However, the 64-bit user-space client must still
  * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT
  *
+ * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled
+ * from it for job slot 0. This is reset when the context first goes active or
+ * is re-activated on that slot.
+ *
+ * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled
+ * from it for job slot 1. This is reset when the context first goes active or
+ * is re-activated on that slot.
+ *
+ * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled
+ * from it for job slot 2. This is reset when the context first goes active or
+ * is re-activated on that slot.
+ *
  * All members need to be separate bits. This enum is intended for use in a
  * bitmask where multiple values get OR-ed together.
  */
@@ -1367,6 +1747,9 @@
 	KCTX_DYING = 1U << 9,
 	KCTX_NO_IMPLICIT_SYNC = 1U << 10,
 	KCTX_FORCE_SAME_VA = 1U << 11,
+	KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12,
+	KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13,
+	KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14,
 };
 
 struct kbase_sub_alloc {
@@ -1375,12 +1758,233 @@
 	DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K);
 };
 
+
+/**
+ * struct kbase_context - Object representing an entity, among which GPU is
+ *                        scheduled and gets its own GPU address space.
+ *                        Created when the device file /dev/malixx is opened.
+ * @filp:                 Pointer to the struct file corresponding to device file
+ *                        /dev/malixx instance, passed to the file's open method.
+ * @kbdev:                Pointer to the Kbase device for which the context is created.
+ * @mmu:                  Structure holding details of the MMU tables for this
+ *                        context
+ * @id:                   Unique indentifier for the context, indicates the number of
+ *                        contexts which have been created for the device so far.
+ * @api_version:          contains the version number for User/kernel interface,
+ *                        used for compatibility check.
+ * @event_list:           list of posted events about completed atoms, to be sent to
+ *                        event handling thread of Userpsace.
+ * @event_coalesce_list:  list containing events corresponding to successive atoms
+ *                        which have requested deferred delivery of the completion
+ *                        events to Userspace.
+ * @event_mutex:          Lock to protect the concurrent access to @event_list &
+ *                        @event_mutex.
+ * @event_closed:         Flag set through POST_TERM ioctl, indicates that Driver
+ *                        should stop posting events and also inform event handling
+ *                        thread that context termination is in progress.
+ * @event_workq:          Workqueue for processing work items corresponding to atoms
+ *                        that do not return an event to Userspace or have to perform
+ *                        a replay job
+ * @event_count:          Count of the posted events to be consumed by Userspace.
+ * @event_coalesce_count: Count of the events present in @event_coalesce_list.
+ * @flags:                bitmap of enums from kbase_context_flags, indicating the
+ *                        state & attributes for the context.
+ * @setup_complete:       Indicates if the setup for context has completed, i.e.
+ *                        flags have been set for the context. Driver allows only
+ *                        2 ioctls until the setup is done. Valid only for
+ *                        @api_version value 0.
+ * @setup_in_progress:    Indicates if the context's setup is in progress and other
+ *                        setup calls during that shall be rejected.
+ * @aliasing_sink_page:   Special page used for KBASE_MEM_TYPE_ALIAS allocations,
+ *                        which can alias number of memory regions. The page is
+ *                        represent a region where it is mapped with a write-alloc
+ *                        cache setup, typically used when the write result of the
+ *                        GPU isn't needed, but the GPU must write anyway.
+ * @mem_partials_lock:    Lock for protecting the operations done on the elements
+ *                        added to @mem_partials list.
+ * @mem_partials:         List head for the list of large pages, 2MB in size, which
+ *                        which have been split into 4 KB pages and are used
+ *                        partially for the allocations >= 2 MB in size.
+ * @reg_lock:             Lock used for GPU virtual address space management operations,
+ *                        like adding/freeing a memory region in the address space.
+ *                        Can be converted to a rwlock ?.
+ * @reg_rbtree_same:      RB tree of the memory regions allocated from the SAME_VA
+ *                        zone of the GPU virtual address space. Used for allocations
+ *                        having the same value for GPU & CPU virtual address.
+ * @reg_rbtree_custom:    RB tree of the memory regions allocated from the CUSTOM_VA
+ *                        zone of the GPU virtual address space.
+ * @cookies:              Bitmask containing of BITS_PER_LONG bits, used mainly for
+ *                        SAME_VA allocations to defer the reservation of memory region
+ *                        (from the GPU virtual address space) from base_mem_alloc
+ *                        ioctl to mmap system call. This helps returning unique
+ *                        handles, disguised as GPU VA, to Userspace from base_mem_alloc
+ *                        and later retrieving the pointer to memory region structure
+ *                        in the mmap handler.
+ * @pending_regions:      Array containing pointers to memory region structures,
+ *                        used in conjunction with @cookies bitmask mainly for
+ *                        providing a mechansim to have the same value for CPU &
+ *                        GPU virtual address.
+ * @event_queue:          Wait queue used for blocking the thread, which consumes
+ *                        the base_jd_event corresponding to an atom, when there
+ *                        are no more posted events.
+ * @tgid:                 thread group id of the process, whose thread opened the
+ *                        device file /dev/malixx instance to create a context.
+ * @pid:                  id of the thread, corresponding to process @tgid, which
+ *                        actually which opened the device file.
+ * @jctx:                 object encapsulating all the Job dispatcher related state,
+ *                        including the array of atoms.
+ * @used_pages:           Keeps a track of the number of 4KB physical pages in use
+ *                        for the context.
+ * @nonmapped_pages:      Updated in the same way as @used_pages, except for the case
+ *                        when special tracking page is freed by userspace where it
+ *                        is reset to 0.
+ * @permanent_mapped_pages: Usage count of permanently mapped memory
+ * @mem_pool:             Object containing the state for the context specific pool of
+ *                        4KB size physical pages.
+ * @lp_mem_pool:          Object containing the state for the context specific pool of
+ *                        2MB size physical pages.
+ * @reclaim:              Shrinker object registered with the kernel containing
+ *                        the pointer to callback function which is invoked under
+ *                        low memory conditions. In the callback function Driver
+ *                        frees up the memory for allocations marked as
+ *                        evictable/reclaimable.
+ * @evict_list:           List head for the list containing the allocations which
+ *                        can be evicted or freed up in the shrinker callback.
+ * @waiting_soft_jobs:    List head for the list containing softjob atoms, which
+ *                        are either waiting for the event set operation, or waiting
+ *                        for the signaling of input fence or waiting for the GPU
+ *                        device to powered on so as to dump the CPU/GPU timestamps.
+ * @waiting_soft_jobs_lock: Lock to protect @waiting_soft_jobs list from concurrent
+ *                        accesses.
+ * @dma_fence:            Object containing list head for the list of dma-buf fence
+ *                        waiting atoms and the waitqueue to process the work item
+ *                        queued for the atoms blocked on the signaling of dma-buf
+ *                        fences.
+ * @as_nr:                id of the address space being used for the scheduled in
+ *                        context. This is effectively part of the Run Pool, because
+ *                        it only has a valid setting (!=KBASEP_AS_NR_INVALID) whilst
+ *                        the context is scheduled in. The hwaccess_lock must be held
+ *                        whilst accessing this.
+ *                        If the context relating to this value of as_nr is required,
+ *                        then the context must be retained to ensure that it doesn't
+ *                        disappear whilst it is being used. Alternatively, hwaccess_lock
+ *                        can be held to ensure the context doesn't disappear (but this
+ *                        has restrictions on what other locks can be taken simutaneously).
+ * @refcount:             Keeps track of the number of users of this context. A user
+ *                        can be a job that is available for execution, instrumentation
+ *                        needing to 'pin' a context for counter collection, etc.
+ *                        If the refcount reaches 0 then this context is considered
+ *                        inactive and the previously programmed AS might be cleared
+ *                        at any point.
+ *                        Generally the reference count is incremented when the context
+ *                        is scheduled in and an atom is pulled from the context's per
+ *                        slot runnable tree.
+ * @mm_update_lock:       lock used for handling of special tracking page.
+ * @process_mm:           Pointer to the memory descriptor of the process which
+ *                        created the context. Used for accounting the physical
+ *                        pages used for GPU allocations, done for the context,
+ *                        to the memory consumed by the process.
+ * @same_va_end:          End address of the SAME_VA zone (in 4KB page units)
+ * @timeline:             Object tracking the number of atoms currently in flight for
+ *                        the context and thread group id of the process, i.e. @tgid.
+ * @mem_profile_data:     Buffer containing the profiling information provided by
+ *                        Userspace, can be read through the mem_profile debugfs file.
+ * @mem_profile_size:     Size of the @mem_profile_data.
+ * @mem_profile_lock:     Lock to serialize the operations related to mem_profile
+ *                        debugfs file.
+ * @kctx_dentry:          Pointer to the debugfs directory created for every context,
+ *                        inside kbase_device::debugfs_ctx_directory, containing
+ *                        context specific files.
+ * @reg_dump:             Buffer containing a register offset & value pair, used
+ *                        for dumping job fault debug info.
+ * @job_fault_count:      Indicates that a job fault occurred for the context and
+ *                        dumping of its debug info is in progress.
+ * @job_fault_resume_event_list: List containing atoms completed after the faulty
+ *                        atom but before the debug data for faulty atom was dumped.
+ * @jsctx_queue:          Per slot & priority arrays of object containing the root
+ *                        of RB-tree holding currently runnable atoms on the job slot
+ *                        and the head item of the linked list of atoms blocked on
+ *                        cross-slot dependencies.
+ * @atoms_pulled:         Total number of atoms currently pulled from the context.
+ * @atoms_pulled_slot:    Per slot count of the number of atoms currently pulled
+ *                        from the context.
+ * @atoms_pulled_slot_pri: Per slot & priority count of the number of atoms currently
+ *                        pulled from the context. hwaccess_lock shall be held when
+ *                        accessing it.
+ * @blocked_js:           Indicates if the context is blocked from submitting atoms
+ *                        on a slot at a given priority. This is set to true, when
+ *                        the atom corresponding to context is soft/hard stopped or
+ *                        removed from the HEAD_NEXT register in response to
+ *                        soft/hard stop.
+ * @slots_pullable:       Bitmask of slots, indicating the slots for which the
+ *                        context has pullable atoms in the runnable tree.
+ * @work:                 Work structure used for deferred ASID assignment.
+ * @vinstr_cli:           Pointer to the legacy userspace vinstr client, there can
+ *                        be only such client per kbase context.
+ * @vinstr_cli_lock:      Lock used for the vinstr ioctl calls made for @vinstr_cli.
+ * @completed_jobs:       List containing completed atoms for which base_jd_event is
+ *                        to be posted.
+ * @work_count:           Number of work items, corresponding to atoms, currently
+ *                        pending on job_done workqueue of @jctx.
+ * @soft_job_timeout:     Timer object used for failing/cancelling the waiting
+ *                        soft-jobs which have been blocked for more than the
+ *                        timeout value used for the soft-jobs
+ * @jit_alloc:            Array of 256 pointers to GPU memory regions, used for
+ *                        for JIT allocations.
+ * @jit_max_allocations:  Maximum number of JIT allocations allowed at once.
+ * @jit_current_allocations: Current number of in-flight JIT allocations.
+ * @jit_current_allocations_per_bin: Current number of in-flight JIT allocations per bin
+ * @jit_version:          version number indicating whether userspace is using
+ *                        old or new version of interface for JIT allocations
+ *	                  1 -> client used KBASE_IOCTL_MEM_JIT_INIT_OLD
+ *	                  2 -> client used KBASE_IOCTL_MEM_JIT_INIT
+ * @jit_active_head:      List containing the JIT allocations which are in use.
+ * @jit_pool_head:        List containing the JIT allocations which have been
+ *                        freed up by userpsace and so not being used by them.
+ *                        Driver caches them to quickly fulfill requests for new
+ *                        JIT allocations. They are released in case of memory
+ *                        pressure as they are put on the @evict_list when they
+ *                        are freed up by userspace.
+ * @jit_destroy_head:     List containing the JIT allocations which were moved to it
+ *                        from @jit_pool_head, in the shrinker callback, after freeing
+ *                        their backing physical pages.
+ * @jit_evict_lock:       Lock used for operations done on JIT allocations and also
+ *                        for accessing @evict_list.
+ * @jit_work:             Work item queued to defer the freeing of memory region when
+ *                        JIT allocation is moved to @jit_destroy_head.
+ * @jit_atoms_head:       A list of the JIT soft-jobs, both alloc & free, in submission
+ *                        order, protected by kbase_jd_context.lock.
+ * @jit_pending_alloc:    A list of JIT alloc soft-jobs for which allocation will be
+ *                        reattempted after the impending free of other active JIT
+ *                        allocations.
+ * @ext_res_meta_head:    A list of sticky external resources which were requested to
+ *                        be mapped on GPU side, through a softjob atom of type
+ *                        EXT_RES_MAP or STICKY_RESOURCE_MAP ioctl.
+ * @drain_pending:        Used to record that a flush/invalidate of the GPU caches was
+ *                        requested from atomic context, so that the next flush request
+ *                        can wait for the flush of GPU writes.
+ * @age_count:            Counter incremented on every call to jd_submit_atom,
+ *                        atom is assigned the snapshot of this counter, which
+ *                        is used to determine the atom's age when it is added to
+ *                        the runnable RB-tree.
+ * @trim_level:           Level of JIT allocation trimming to perform on free (0-100%)
+ * @gwt_enabled:          Indicates if tracking of GPU writes is enabled, protected by
+ *                        kbase_context.reg_lock.
+ * @gwt_was_enabled:      Simple sticky bit flag to know if GWT was ever enabled.
+ * @gwt_current_list:     A list of addresses for which GPU has generated write faults,
+ *                        after the last snapshot of it was sent to userspace.
+ * @gwt_snapshot_list:    Snapshot of the @gwt_current_list for sending to user space.
+ * @priority:             Indicates the context priority. Used along with @atoms_count
+ *                        for context scheduling, protected by hwaccess_lock.
+ * @atoms_count:          Number of gpu atoms currently in use, per priority
+ */
 struct kbase_context {
 	struct file *filp;
 	struct kbase_device *kbdev;
-	u32 id; /* System wide unique id */
+	struct kbase_mmu_table mmu;
+
+	u32 id;
 	unsigned long api_version;
-	phys_addr_t pgd;
 	struct list_head event_list;
 	struct list_head event_coalesce_list;
 	struct mutex event_mutex;
@@ -1394,21 +1998,15 @@
 	atomic_t                setup_complete;
 	atomic_t                setup_in_progress;
 
-	u64 *mmu_teardown_pages;
-
 	struct tagged_addr aliasing_sink_page;
 
-	struct mutex            mem_partials_lock;
+	spinlock_t              mem_partials_lock;
 	struct list_head        mem_partials;
 
-	struct mutex            mmu_lock;
-	struct mutex            reg_lock; /* To be converted to a rwlock? */
-	struct rb_root reg_rbtree_same; /* RB tree of GPU (live) regions,
-					 * SAME_VA zone */
-	struct rb_root reg_rbtree_exec; /* RB tree of GPU (live) regions,
-					 * EXEC zone */
-	struct rb_root reg_rbtree_custom; /* RB tree of GPU (live) regions,
-					 * CUSTOM_VA zone */
+	struct mutex            reg_lock;
+	struct rb_root reg_rbtree_same;
+	struct rb_root reg_rbtree_custom;
+
 
 	unsigned long    cookies;
 	struct kbase_va_region *pending_regions[BITS_PER_LONG];
@@ -1420,6 +2018,7 @@
 	struct kbase_jd_context jctx;
 	atomic_t used_pages;
 	atomic_t         nonmapped_pages;
+	unsigned long permanent_mapped_pages;
 
 	struct kbase_mem_pool mem_pool;
 	struct kbase_mem_pool lp_mem_pool;
@@ -1435,26 +2034,12 @@
 		struct workqueue_struct *wq;
 	} dma_fence;
 #endif /* CONFIG_MALI_DMA_FENCE */
-	/** This is effectively part of the Run Pool, because it only has a valid
-	 * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
-	 *
-	 * The hwaccess_lock must be held whilst accessing this.
-	 *
-	 * If the context relating to this as_nr is required, you must use
-	 * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear
-	 * whilst you're using it. Alternatively, just hold the hwaccess_lock
-	 * to ensure the context doesn't disappear (but this has restrictions on what other locks
-	 * you can take whilst doing this) */
+
 	int as_nr;
 
-	/* Keeps track of the number of users of this context. A user can be a
-	 * job that is available for execution, instrumentation needing to 'pin'
-	 * a context for counter collection, etc. If the refcount reaches 0 then
-	 * this context is considered inactive and the previously programmed
-	 * AS might be cleared at any point.
-	 */
 	atomic_t refcount;
 
+
 	/* NOTE:
 	 *
 	 * Flags are in jctx.sched_info.ctx.flags
@@ -1462,29 +2047,17 @@
 	 *
 	 * All other flags must be added there */
 	spinlock_t         mm_update_lock;
-	struct mm_struct *process_mm;
-	/* End of the SAME_VA zone */
+	struct mm_struct __rcu *process_mm;
 	u64 same_va_end;
 
-#ifdef CONFIG_MALI_TRACE_TIMELINE
-	struct kbase_trace_kctx_timeline timeline;
-#endif
 #ifdef CONFIG_DEBUG_FS
-	/* Content of mem_profile file */
 	char *mem_profile_data;
-	/* Size of @c mem_profile_data */
 	size_t mem_profile_size;
-	/* Mutex guarding memory profile state */
 	struct mutex mem_profile_lock;
-	/* Memory profile directory under debugfs */
 	struct dentry *kctx_dentry;
 
-	/* for job fault debug */
 	unsigned int *reg_dump;
 	atomic_t job_fault_count;
-	/* This list will keep the following atoms during the dump
-	 * in the same context
-	 */
 	struct list_head job_fault_resume_event_list;
 
 #endif /* CONFIG_DEBUG_FS */
@@ -1492,86 +2065,59 @@
 	struct jsctx_queue jsctx_queue
 		[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
 
-	/* Number of atoms currently pulled from this context */
 	atomic_t atoms_pulled;
-	/* Number of atoms currently pulled from this context, per slot */
 	atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
-	/* Number of atoms currently pulled from this context, per slot and
-	 * priority. Hold hwaccess_lock when accessing */
 	int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][
 			KBASE_JS_ATOM_SCHED_PRIO_COUNT];
 
-	/* true if slot is blocked on the given priority. This will be set on a
-	 * soft-stop */
 	bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
 
-	/* Bitmask of slots that can be pulled from */
 	u32 slots_pullable;
 
-	/* Backend specific data */
-	struct kbase_context_backend backend;
-
-	/* Work structure used for deferred ASID assignment */
 	struct work_struct work;
 
-	/* Only one userspace vinstr client per kbase context */
 	struct kbase_vinstr_client *vinstr_cli;
 	struct mutex vinstr_cli_lock;
 
-	/* List of completed jobs waiting for events to be posted */
 	struct list_head completed_jobs;
-	/* Number of work items currently pending on job_done_wq */
 	atomic_t work_count;
 
-	/* Waiting soft-jobs will fail when this timer expires */
 	struct timer_list soft_job_timeout;
 
-	/* JIT allocation management */
 	struct kbase_va_region *jit_alloc[256];
+	u8 jit_max_allocations;
+	u8 jit_current_allocations;
+	u8 jit_current_allocations_per_bin[256];
+	u8 jit_version;
 	struct list_head jit_active_head;
 	struct list_head jit_pool_head;
 	struct list_head jit_destroy_head;
 	struct mutex jit_evict_lock;
 	struct work_struct jit_work;
 
-	/* A list of the JIT soft-jobs in submission order
-	 * (protected by kbase_jd_context.lock)
-	 */
 	struct list_head jit_atoms_head;
-	/* A list of pending JIT alloc soft-jobs (using the 'queue' list_head)
-	 * (protected by kbase_jd_context.lock)
-	 */
 	struct list_head jit_pending_alloc;
 
-	/* External sticky resource management */
 	struct list_head ext_res_meta_head;
 
-	/* Used to record that a drain was requested from atomic context */
 	atomic_t drain_pending;
 
-	/* Current age count, used to determine age for newly submitted atoms */
 	u32 age_count;
 
+	u8 trim_level;
+
 #ifdef CONFIG_MALI_JOB_DUMP
-	/* Used for tracking GPU writes.
-	 * (protected by kbase_context.reg_lock)
-	 */
 	bool gwt_enabled;
 
-	/* Simple sticky bit flag to know if GWT was ever enabled
-	 * (protected by kbase_context.reg_lock)
-	 */
 	bool gwt_was_enabled;
 
-	/* Current list of GPU writes.
-	 * (protected by kbase_context.reg_lock)
-	 */
 	struct list_head gwt_current_list;
 
-	 /* Snapshot of list of GPU writes for sending to user space. */
 	struct list_head gwt_snapshot_list;
-
 #endif
+
+	int priority;
+	s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
 };
 
 #ifdef CONFIG_MALI_JOB_DUMP
@@ -1579,17 +2125,17 @@
  * struct kbasep_gwt_list_element - Structure used to collect GPU
  *                                  write faults.
  * @link:                           List head for adding write faults.
- * @handle:                         The handle for the modified region.
- * @offset:                         The offset in pages of the modified
- *                                  part of the region.
+ * @region:                         Details of the region where we have the
+ *                                  faulting page address.
+ * @page_addr:                      Page address where GPU write fault occurred.
  * @num_pages:                      The number of pages modified.
  *
  * Using this structure all GPU write faults are stored in a list.
  */
 struct kbasep_gwt_list_element {
 	struct list_head link;
-	u64 handle;
-	u64 offset;
+	struct kbase_va_region *region;
+	u64 page_addr;
 	u64 num_pages;
 };
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c
index d294042..804cf3f 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_device.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -225,14 +225,6 @@
 
 	mutex_init(&kbdev->cacheclean_lock);
 
-#ifdef CONFIG_MALI_TRACE_TIMELINE
-	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
-		kbdev->timeline.slot_atoms_submitted[i] = 0;
-
-	for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
-		atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
-#endif /* CONFIG_MALI_TRACE_TIMELINE */
-
 	/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
 	for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
 		kbdev->kbase_profiling_controls[i] = 0;
@@ -254,10 +246,6 @@
 	else
 		kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
 
-#ifdef CONFIG_MALI_DEBUG
-	init_waitqueue_head(&kbdev->driver_inactive_wait);
-#endif /* CONFIG_MALI_DEBUG */
-
 	return 0;
 term_trace:
 	kbasep_trace_term(kbdev);
@@ -289,91 +277,6 @@
 	kfree(kbdev);
 }
 
-int kbase_device_trace_buffer_install(
-		struct kbase_context *kctx, u32 *tb, size_t size)
-{
-	unsigned long flags;
-
-	KBASE_DEBUG_ASSERT(kctx);
-	KBASE_DEBUG_ASSERT(tb);
-
-	/* Interface uses 16-bit value to track last accessed entry. Each entry
-	 * is composed of two 32-bit words.
-	 * This limits the size that can be handled without an overflow. */
-	if (0xFFFF * (2 * sizeof(u32)) < size)
-		return -EINVAL;
-
-	/* set up the header */
-	/* magic number in the first 4 bytes */
-	tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
-	/* Store (write offset = 0, wrap counter = 0, transaction active = no)
-	 * write offset 0 means never written.
-	 * Offsets 1 to (wrap_offset - 1) used to store values when trace started
-	 */
-	tb[1] = 0;
-
-	/* install trace buffer */
-	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
-	kctx->jctx.tb_wrap_offset = size / 8;
-	kctx->jctx.tb = tb;
-	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
-
-	return 0;
-}
-
-void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
-{
-	unsigned long flags;
-
-	KBASE_DEBUG_ASSERT(kctx);
-	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
-	kctx->jctx.tb = NULL;
-	kctx->jctx.tb_wrap_offset = 0;
-	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
-}
-
-void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
-	if (kctx->jctx.tb) {
-		u16 wrap_count;
-		u16 write_offset;
-		u32 *tb = kctx->jctx.tb;
-		u32 header_word;
-
-		header_word = tb[1];
-		KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
-
-		wrap_count = (header_word >> 1) & 0x7FFF;
-		write_offset = (header_word >> 16) & 0xFFFF;
-
-		/* mark as transaction in progress */
-		tb[1] |= 0x1;
-		mb();
-
-		/* calculate new offset */
-		write_offset++;
-		if (write_offset == kctx->jctx.tb_wrap_offset) {
-			/* wrap */
-			write_offset = 1;
-			wrap_count++;
-			wrap_count &= 0x7FFF;	/* 15bit wrap counter */
-		}
-
-		/* store the trace entry at the selected offset */
-		tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
-		tb[write_offset * 2 + 1] = reg_value;
-		mb();
-
-		/* new header word */
-		header_word = (write_offset << 16) | (wrap_count << 1) | 0x0;	/* transaction complete */
-		tb[1] = header_word;
-	}
-	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
-}
-
 /*
  * Device trace functions
  */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c
index e290fce..3c9cef3 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_event.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -38,8 +38,6 @@
 
 	data = katom->udata;
 
-	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
-
 	KBASE_TLSTREAM_TL_NRET_ATOM_CTX(katom, kctx);
 	KBASE_TLSTREAM_TL_DEL_ATOM(katom);
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_fence.h b/drivers/gpu/arm/midgard/mali_kbase_fence.h
index 865060d..d7a65e0 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_fence.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_fence.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -139,11 +139,16 @@
 static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom,
 					 int status)
 {
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0))
-	katom->dma_fence.fence->error = status;
+	if (status) {
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
+	  KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)
+		fence_set_error(katom->dma_fence.fence, status);
+#elif (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE)
+		dma_fence_set_error(katom->dma_fence.fence, status);
 #else
-	katom->dma_fence.fence->status = status;
+		katom->dma_fence.fence->status = status;
 #endif
+	}
 	return dma_fence_signal(katom->dma_fence.fence);
 }
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h b/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h
index b629a88..607a95c 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -44,7 +44,12 @@
 #define dma_fence_is_signaled(a) fence_is_signaled(a)
 #define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c)
 #define dma_fence_remove_callback(a, b) fence_remove_callback(a, b)
+
+#if (KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0)
+#else
 #define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0)
+#endif
 
 #else
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
index 2fa6806..7077c3a 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -73,10 +73,26 @@
 			hardware_counters = hardware_counters_mali_tSIx;
 			count = ARRAY_SIZE(hardware_counters_mali_tSIx);
 			break;
+		case GPU_ID2_PRODUCT_TDVX:
+			hardware_counters = hardware_counters_mali_tSIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tSIx);
+			break;
 		case GPU_ID2_PRODUCT_TNOX:
 			hardware_counters = hardware_counters_mali_tNOx;
 			count = ARRAY_SIZE(hardware_counters_mali_tNOx);
 			break;
+		case GPU_ID2_PRODUCT_TGOX:
+			hardware_counters = hardware_counters_mali_tGOx;
+			count = ARRAY_SIZE(hardware_counters_mali_tGOx);
+			break;
+		case GPU_ID2_PRODUCT_TKAX:
+			hardware_counters = hardware_counters_mali_tKAx;
+			count = ARRAY_SIZE(hardware_counters_mali_tKAx);
+			break;
+		case GPU_ID2_PRODUCT_TTRX:
+			hardware_counters = hardware_counters_mali_tTRx;
+			count = ARRAY_SIZE(hardware_counters_mali_tTRx);
+			break;
 		default:
 			hardware_counters = NULL;
 			count = 0;
@@ -158,7 +174,7 @@
 struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
 {
 	struct kbase_gator_hwcnt_handles *hand;
-	struct kbase_uk_hwcnt_reader_setup setup;
+	struct kbase_ioctl_hwcnt_reader_setup setup;
 	uint32_t dump_size = 0, i = 0;
 
 	if (!in_out_info)
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
index b048db8..5d38c7b 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
@@ -2169,6 +2169,8 @@
 
 #include "mali_kbase_gator_hwcnt_names_tnox.h"
 
+#include "mali_kbase_gator_hwcnt_names_tgox.h"
+
 #include "mali_kbase_gator_hwcnt_names_tkax.h"
 
 #include "mali_kbase_gator_hwcnt_names_ttrx.h"
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tgox.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tgox.h
new file mode 100644
index 0000000..72b5266
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tgox.h
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TGOX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TGOX_H_
+
+static const char * const hardware_counters_mali_tGOx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TGOx_MESSAGES_SENT",
+	"TGOx_MESSAGES_RECEIVED",
+	"TGOx_GPU_ACTIVE",
+	"TGOx_IRQ_ACTIVE",
+	"TGOx_JS0_JOBS",
+	"TGOx_JS0_TASKS",
+	"TGOx_JS0_ACTIVE",
+	"",
+	"TGOx_JS0_WAIT_READ",
+	"TGOx_JS0_WAIT_ISSUE",
+	"TGOx_JS0_WAIT_DEPEND",
+	"TGOx_JS0_WAIT_FINISH",
+	"TGOx_JS1_JOBS",
+	"TGOx_JS1_TASKS",
+	"TGOx_JS1_ACTIVE",
+	"",
+	"TGOx_JS1_WAIT_READ",
+	"TGOx_JS1_WAIT_ISSUE",
+	"TGOx_JS1_WAIT_DEPEND",
+	"TGOx_JS1_WAIT_FINISH",
+	"TGOx_JS2_JOBS",
+	"TGOx_JS2_TASKS",
+	"TGOx_JS2_ACTIVE",
+	"",
+	"TGOx_JS2_WAIT_READ",
+	"TGOx_JS2_WAIT_ISSUE",
+	"TGOx_JS2_WAIT_DEPEND",
+	"TGOx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TGOx_TILER_ACTIVE",
+	"TGOx_JOBS_PROCESSED",
+	"TGOx_TRIANGLES",
+	"TGOx_LINES",
+	"TGOx_POINTS",
+	"TGOx_FRONT_FACING",
+	"TGOx_BACK_FACING",
+	"TGOx_PRIM_VISIBLE",
+	"TGOx_PRIM_CULLED",
+	"TGOx_PRIM_CLIPPED",
+	"TGOx_PRIM_SAT_CULLED",
+	"TGOx_BIN_ALLOC_INIT",
+	"TGOx_BIN_ALLOC_OVERFLOW",
+	"TGOx_BUS_READ",
+	"",
+	"TGOx_BUS_WRITE",
+	"TGOx_LOADING_DESC",
+	"TGOx_IDVS_POS_SHAD_REQ",
+	"TGOx_IDVS_POS_SHAD_WAIT",
+	"TGOx_IDVS_POS_SHAD_STALL",
+	"TGOx_IDVS_POS_FIFO_FULL",
+	"TGOx_PREFETCH_STALL",
+	"TGOx_VCACHE_HIT",
+	"TGOx_VCACHE_MISS",
+	"TGOx_VCACHE_LINE_WAIT",
+	"TGOx_VFETCH_POS_READ_WAIT",
+	"TGOx_VFETCH_VERTEX_WAIT",
+	"TGOx_VFETCH_STALL",
+	"TGOx_PRIMASSY_STALL",
+	"TGOx_BBOX_GEN_STALL",
+	"TGOx_IDVS_VBU_HIT",
+	"TGOx_IDVS_VBU_MISS",
+	"TGOx_IDVS_VBU_LINE_DEALLOCATE",
+	"TGOx_IDVS_VAR_SHAD_REQ",
+	"TGOx_IDVS_VAR_SHAD_STALL",
+	"TGOx_BINNER_STALL",
+	"TGOx_ITER_STALL",
+	"TGOx_COMPRESS_MISS",
+	"TGOx_COMPRESS_STALL",
+	"TGOx_PCACHE_HIT",
+	"TGOx_PCACHE_MISS",
+	"TGOx_PCACHE_MISS_STALL",
+	"TGOx_PCACHE_EVICT_STALL",
+	"TGOx_PMGR_PTR_WR_STALL",
+	"TGOx_PMGR_PTR_RD_STALL",
+	"TGOx_PMGR_CMD_WR_STALL",
+	"TGOx_WRBUF_ACTIVE",
+	"TGOx_WRBUF_HIT",
+	"TGOx_WRBUF_MISS",
+	"TGOx_WRBUF_NO_FREE_LINE_STALL",
+	"TGOx_WRBUF_NO_AXI_ID_STALL",
+	"TGOx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TGOx_UTLB_TRANS",
+	"TGOx_UTLB_TRANS_HIT",
+	"TGOx_UTLB_TRANS_STALL",
+	"TGOx_UTLB_TRANS_MISS_DELAY",
+	"TGOx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TGOx_FRAG_ACTIVE",
+	"TGOx_FRAG_PRIMITIVES",
+	"TGOx_FRAG_PRIM_RAST",
+	"TGOx_FRAG_FPK_ACTIVE",
+	"TGOx_FRAG_STARVING",
+	"TGOx_FRAG_WARPS",
+	"TGOx_FRAG_PARTIAL_WARPS",
+	"TGOx_FRAG_QUADS_RAST",
+	"TGOx_FRAG_QUADS_EZS_TEST",
+	"TGOx_FRAG_QUADS_EZS_UPDATE",
+	"TGOx_FRAG_QUADS_EZS_KILL",
+	"TGOx_FRAG_LZS_TEST",
+	"TGOx_FRAG_LZS_KILL",
+	"TGOx_WARP_REG_SIZE_64",
+	"TGOx_FRAG_PTILES",
+	"TGOx_FRAG_TRANS_ELIM",
+	"TGOx_QUAD_FPK_KILLER",
+	"TGOx_FULL_QUAD_WARPS",
+	"TGOx_COMPUTE_ACTIVE",
+	"TGOx_COMPUTE_TASKS",
+	"TGOx_COMPUTE_WARPS",
+	"TGOx_COMPUTE_STARVING",
+	"TGOx_EXEC_CORE_ACTIVE",
+	"TGOx_EXEC_ACTIVE",
+	"TGOx_EXEC_INSTR_COUNT",
+	"TGOx_EXEC_INSTR_DIVERGED",
+	"TGOx_EXEC_INSTR_STARVING",
+	"TGOx_ARITH_INSTR_SINGLE_FMA",
+	"TGOx_ARITH_INSTR_DOUBLE",
+	"TGOx_ARITH_INSTR_MSG",
+	"TGOx_ARITH_INSTR_MSG_ONLY",
+	"TGOx_TEX_MSGI_NUM_QUADS",
+	"TGOx_TEX_DFCH_NUM_PASSES",
+	"TGOx_TEX_DFCH_NUM_PASSES_MISS",
+	"TGOx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TGOx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TGOx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TGOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TGOx_TEX_TFCH_NUM_OPERATIONS",
+	"TGOx_TEX_FILT_NUM_OPERATIONS",
+	"TGOx_LS_MEM_READ_FULL",
+	"TGOx_LS_MEM_READ_SHORT",
+	"TGOx_LS_MEM_WRITE_FULL",
+	"TGOx_LS_MEM_WRITE_SHORT",
+	"TGOx_LS_MEM_ATOMIC",
+	"TGOx_VARY_INSTR",
+	"TGOx_VARY_SLOT_32",
+	"TGOx_VARY_SLOT_16",
+	"TGOx_ATTR_INSTR",
+	"TGOx_ARITH_INSTR_FP_MUL",
+	"TGOx_BEATS_RD_FTC",
+	"TGOx_BEATS_RD_FTC_EXT",
+	"TGOx_BEATS_RD_LSC",
+	"TGOx_BEATS_RD_LSC_EXT",
+	"TGOx_BEATS_RD_TEX",
+	"TGOx_BEATS_RD_TEX_EXT",
+	"TGOx_BEATS_RD_OTHER",
+	"TGOx_BEATS_WR_LSC_WB",
+	"TGOx_BEATS_WR_TIB",
+	"TGOx_BEATS_WR_LSC_OTHER",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TGOx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TGOx_L2_RD_MSG_IN",
+	"TGOx_L2_RD_MSG_IN_STALL",
+	"TGOx_L2_WR_MSG_IN",
+	"TGOx_L2_WR_MSG_IN_STALL",
+	"TGOx_L2_SNP_MSG_IN",
+	"TGOx_L2_SNP_MSG_IN_STALL",
+	"TGOx_L2_RD_MSG_OUT",
+	"TGOx_L2_RD_MSG_OUT_STALL",
+	"TGOx_L2_WR_MSG_OUT",
+	"TGOx_L2_ANY_LOOKUP",
+	"TGOx_L2_READ_LOOKUP",
+	"TGOx_L2_WRITE_LOOKUP",
+	"TGOx_L2_EXT_SNOOP_LOOKUP",
+	"TGOx_L2_EXT_READ",
+	"TGOx_L2_EXT_READ_NOSNP",
+	"TGOx_L2_EXT_READ_UNIQUE",
+	"TGOx_L2_EXT_READ_BEATS",
+	"TGOx_L2_EXT_AR_STALL",
+	"TGOx_L2_EXT_AR_CNT_Q1",
+	"TGOx_L2_EXT_AR_CNT_Q2",
+	"TGOx_L2_EXT_AR_CNT_Q3",
+	"TGOx_L2_EXT_RRESP_0_127",
+	"TGOx_L2_EXT_RRESP_128_191",
+	"TGOx_L2_EXT_RRESP_192_255",
+	"TGOx_L2_EXT_RRESP_256_319",
+	"TGOx_L2_EXT_RRESP_320_383",
+	"TGOx_L2_EXT_WRITE",
+	"TGOx_L2_EXT_WRITE_NOSNP_FULL",
+	"TGOx_L2_EXT_WRITE_NOSNP_PTL",
+	"TGOx_L2_EXT_WRITE_SNP_FULL",
+	"TGOx_L2_EXT_WRITE_SNP_PTL",
+	"TGOx_L2_EXT_WRITE_BEATS",
+	"TGOx_L2_EXT_W_STALL",
+	"TGOx_L2_EXT_AW_CNT_Q1",
+	"TGOx_L2_EXT_AW_CNT_Q2",
+	"TGOx_L2_EXT_AW_CNT_Q3",
+	"TGOx_L2_EXT_SNOOP",
+	"TGOx_L2_EXT_SNOOP_STALL",
+	"TGOx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TGOx_L2_EXT_SNOOP_RESP_DATA",
+	"TGOx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TGOX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
index af00a6a..e24e91a 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tkax.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tkax.h
index 1c1f669..73db45c 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tkax.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tkax.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -222,9 +222,9 @@
 	"TKAx_BEATS_RD_TEX",
 	"TKAx_BEATS_RD_TEX_EXT",
 	"TKAx_BEATS_RD_OTHER",
-	"TKAx_BEATS_WR_LSC_WB",
-	"TKAx_BEATS_WR_TIB",
 	"TKAx_BEATS_WR_LSC_OTHER",
+	"TKAx_BEATS_WR_TIB",
+	"TKAx_BEATS_WR_LSC_WB",
 
 	/* Performance counters for the Memory System */
 	"",
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
index 233ffbe..63eac50 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tnox.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tnox.h
index fbb5080..932663c 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tnox.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tnox.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -222,9 +222,9 @@
 	"TNOx_BEATS_RD_TEX",
 	"TNOx_BEATS_RD_TEX_EXT",
 	"TNOx_BEATS_RD_OTHER",
-	"TNOx_BEATS_WR_LSC_WB",
-	"TNOx_BEATS_WR_TIB",
 	"TNOx_BEATS_WR_LSC_OTHER",
+	"TNOx_BEATS_WR_TIB",
+	"TNOx_BEATS_WR_LSC_WB",
 
 	/* Performance counters for the Memory System */
 	"",
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h
index 552db57..b8dde32b 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -222,9 +222,9 @@
 	"TSIx_BEATS_RD_TEX",
 	"TSIx_BEATS_RD_TEX_EXT",
 	"TSIx_BEATS_RD_OTHER",
-	"TSIx_BEATS_WR_LSC",
+	"TSIx_BEATS_WR_LSC_OTHER",
 	"TSIx_BEATS_WR_TIB",
-	"",
+	"TSIx_BEATS_WR_LSC_WB",
 
 	/* Performance counters for the Memory System */
 	"",
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h
index d1bb02a..c1e315b 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -116,10 +116,10 @@
 	"",
 	"TTRx_BUS_WRITE",
 	"TTRx_LOADING_DESC",
-	"",
-	"",
-	"",
-	"",
+	"TTRx_IDVS_POS_SHAD_REQ",
+	"TTRx_IDVS_POS_SHAD_WAIT",
+	"TTRx_IDVS_POS_SHAD_STALL",
+	"TTRx_IDVS_POS_FIFO_FULL",
 	"TTRx_PREFETCH_STALL",
 	"TTRx_VCACHE_HIT",
 	"TTRx_VCACHE_MISS",
@@ -129,11 +129,11 @@
 	"TTRx_VFETCH_STALL",
 	"TTRx_PRIMASSY_STALL",
 	"TTRx_BBOX_GEN_STALL",
-	"",
-	"",
-	"",
-	"",
-	"",
+	"TTRx_IDVS_VBU_HIT",
+	"TTRx_IDVS_VBU_MISS",
+	"TTRx_IDVS_VBU_LINE_DEALLOCATE",
+	"TTRx_IDVS_VAR_SHAD_REQ",
+	"TTRx_IDVS_VAR_SHAD_STALL",
 	"TTRx_BINNER_STALL",
 	"TTRx_ITER_STALL",
 	"TTRx_COMPRESS_MISS",
@@ -178,33 +178,33 @@
 	"TTRx_FRAG_QUADS_EZS_KILL",
 	"TTRx_FRAG_LZS_TEST",
 	"TTRx_FRAG_LZS_KILL",
-	"",
+	"TTRx_WARP_REG_SIZE_64",
 	"TTRx_FRAG_PTILES",
 	"TTRx_FRAG_TRANS_ELIM",
 	"TTRx_QUAD_FPK_KILLER",
-	"",
+	"TTRx_FULL_QUAD_WARPS",
 	"TTRx_COMPUTE_ACTIVE",
 	"TTRx_COMPUTE_TASKS",
 	"TTRx_COMPUTE_WARPS",
 	"TTRx_COMPUTE_STARVING",
 	"TTRx_EXEC_CORE_ACTIVE",
-	"TTRx_EXEC_ACTIVE",
-	"TTRx_EXEC_INSTR_COUNT",
+	"TTRx_EXEC_INSTR_FMA",
+	"TTRx_EXEC_INSTR_CVT",
+	"TTRx_EXEC_INSTR_SFU",
+	"TTRx_EXEC_INSTR_MSG",
 	"TTRx_EXEC_INSTR_DIVERGED",
-	"TTRx_EXEC_INSTR_STARVING",
-	"TTRx_ARITH_INSTR_SINGLE_FMA",
-	"TTRx_ARITH_INSTR_DOUBLE",
-	"TTRx_ARITH_INSTR_MSG",
-	"TTRx_ARITH_INSTR_MSG_ONLY",
-	"TTRx_TEX_INSTR",
-	"TTRx_TEX_INSTR_MIPMAP",
-	"TTRx_TEX_INSTR_COMPRESSED",
-	"TTRx_TEX_INSTR_3D",
-	"TTRx_TEX_INSTR_TRILINEAR",
-	"TTRx_TEX_COORD_ISSUE",
-	"TTRx_TEX_COORD_STALL",
-	"TTRx_TEX_STARVE_CACHE",
-	"TTRx_TEX_STARVE_FILTER",
+	"TTRx_EXEC_ICACHE_MISS",
+	"TTRx_EXEC_STARVE_ARITH",
+	"TTRx_CALL_BLEND_SHADER",
+	"TTRx_TEX_MSGI_NUM_QUADS",
+	"TTRx_TEX_DFCH_NUM_PASSES",
+	"TTRx_TEX_DFCH_NUM_PASSES_MISS",
+	"TTRx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TTRx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TTRx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TTRx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TTRx_TEX_TFCH_NUM_OPERATIONS",
+	"TTRx_TEX_FILT_NUM_OPERATIONS",
 	"TTRx_LS_MEM_READ_FULL",
 	"TTRx_LS_MEM_READ_SHORT",
 	"TTRx_LS_MEM_WRITE_FULL",
@@ -222,9 +222,9 @@
 	"TTRx_BEATS_RD_TEX",
 	"TTRx_BEATS_RD_TEX_EXT",
 	"TTRx_BEATS_RD_OTHER",
-	"TTRx_BEATS_WR_LSC",
+	"TTRx_BEATS_WR_LSC_OTHER",
 	"TTRx_BEATS_WR_TIB",
-	"",
+	"TTRx_BEATS_WR_LSC_WB",
 
 	/* Performance counters for the Memory System */
 	"",
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
index 4052e2f..218e63a 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,20 +27,20 @@
 #define GPU_ID_VERSION_MINOR_SHIFT        4
 #define GPU_ID_VERSION_MAJOR_SHIFT        12
 #define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
-#define GPU_ID_VERSION_STATUS             (0xF  << GPU_ID_VERSION_STATUS_SHIFT)
-#define GPU_ID_VERSION_MINOR              (0xFF << GPU_ID_VERSION_MINOR_SHIFT)
-#define GPU_ID_VERSION_MAJOR              (0xF  << GPU_ID_VERSION_MAJOR_SHIFT)
-#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+#define GPU_ID_VERSION_STATUS             (0xFu  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFFu << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xFu  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFFu << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
 
 /* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
-#define GPU_ID_PI_T60X                    0x6956
-#define GPU_ID_PI_T62X                    0x0620
-#define GPU_ID_PI_T76X                    0x0750
-#define GPU_ID_PI_T72X                    0x0720
-#define GPU_ID_PI_TFRX                    0x0880
-#define GPU_ID_PI_T86X                    0x0860
-#define GPU_ID_PI_T82X                    0x0820
-#define GPU_ID_PI_T83X                    0x0830
+#define GPU_ID_PI_T60X                    0x6956u
+#define GPU_ID_PI_T62X                    0x0620u
+#define GPU_ID_PI_T76X                    0x0750u
+#define GPU_ID_PI_T72X                    0x0720u
+#define GPU_ID_PI_TFRX                    0x0880u
+#define GPU_ID_PI_T86X                    0x0860u
+#define GPU_ID_PI_T82X                    0x0820u
+#define GPU_ID_PI_T83X                    0x0830u
 
 /* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
 #define GPU_ID_PI_NEW_FORMAT_START        0x1000
@@ -55,13 +55,13 @@
 #define GPU_ID2_ARCH_REV_SHIFT            20
 #define GPU_ID2_ARCH_MINOR_SHIFT          24
 #define GPU_ID2_ARCH_MAJOR_SHIFT          28
-#define GPU_ID2_VERSION_STATUS            (0xF << GPU_ID2_VERSION_STATUS_SHIFT)
-#define GPU_ID2_VERSION_MINOR             (0xFF << GPU_ID2_VERSION_MINOR_SHIFT)
-#define GPU_ID2_VERSION_MAJOR             (0xF << GPU_ID2_VERSION_MAJOR_SHIFT)
-#define GPU_ID2_PRODUCT_MAJOR             (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT)
-#define GPU_ID2_ARCH_REV                  (0xF << GPU_ID2_ARCH_REV_SHIFT)
-#define GPU_ID2_ARCH_MINOR                (0xF << GPU_ID2_ARCH_MINOR_SHIFT)
-#define GPU_ID2_ARCH_MAJOR                (0xF << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_VERSION_STATUS            (0xFu << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR             (0xFFu << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR             (0xFu << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR             (0xFu << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV                  (0xFu << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR                (0xFu << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR                (0xFu << GPU_ID2_ARCH_MAJOR_SHIFT)
 #define GPU_ID2_PRODUCT_MODEL  (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
 #define GPU_ID2_VERSION        (GPU_ID2_VERSION_MAJOR | \
 								GPU_ID2_VERSION_MINOR | \
@@ -70,17 +70,17 @@
 /* Helper macro to create a partial GPU_ID (new format) that defines
    a product ignoring its version. */
 #define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
-		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
-		 ((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
-		 ((arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
-		 ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+		((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		 (((u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
+		 (((u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
+		 (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
 
 /* Helper macro to create a partial GPU_ID (new format) that specifies the
    revision (major, minor, status) of a product */
 #define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
-		(((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
-		 ((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
-		 ((version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+		((((u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
+		 (((u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
+		 (((u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
 
 /* Helper macro to create a complete GPU_ID (new format) */
 #define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
@@ -93,25 +93,25 @@
 /* Helper macro to create a partial GPU_ID (new format) that identifies
    a particular GPU model by its arch_major and product_major. */
 #define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
-		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
-		((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+		((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		(((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
 
 /* Strip off the non-relevant bits from a product_id value and make it suitable
    for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
    model. */
 #define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
-		(((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
+		((((u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
 		    GPU_ID2_PRODUCT_MODEL)
 
-#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6u, 0)
-#define GPU_ID2_PRODUCT_THEX              GPU_ID2_MODEL_MAKE(6u, 1)
-#define GPU_ID2_PRODUCT_TSIX              GPU_ID2_MODEL_MAKE(7u, 0)
-#define GPU_ID2_PRODUCT_TDVX              GPU_ID2_MODEL_MAKE(7u, 3)
-#define GPU_ID2_PRODUCT_TNOX              GPU_ID2_MODEL_MAKE(7u, 1)
-#define GPU_ID2_PRODUCT_TGOX              GPU_ID2_MODEL_MAKE(7u, 2)
-#define GPU_ID2_PRODUCT_TKAX              GPU_ID2_MODEL_MAKE(8u, 0)
-#define GPU_ID2_PRODUCT_TTRX              GPU_ID2_MODEL_MAKE(8u, 1)
-#define GPU_ID2_PRODUCT_TBOX              GPU_ID2_MODEL_MAKE(8u, 2)
+#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
+#define GPU_ID2_PRODUCT_THEX              GPU_ID2_MODEL_MAKE(6, 1)
+#define GPU_ID2_PRODUCT_TSIX              GPU_ID2_MODEL_MAKE(7, 0)
+#define GPU_ID2_PRODUCT_TDVX              GPU_ID2_MODEL_MAKE(7, 3)
+#define GPU_ID2_PRODUCT_TNOX              GPU_ID2_MODEL_MAKE(7, 1)
+#define GPU_ID2_PRODUCT_TGOX              GPU_ID2_MODEL_MAKE(7, 2)
+#define GPU_ID2_PRODUCT_TKAX              GPU_ID2_MODEL_MAKE(8, 0)
+#define GPU_ID2_PRODUCT_TTRX              GPU_ID2_MODEL_MAKE(9, 0)
+#define GPU_ID2_PRODUCT_TBOX              GPU_ID2_MODEL_MAKE(8, 2)
 
 /* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
 #define GPU_ID_S_15DEV0                   0x1
@@ -120,9 +120,9 @@
 /* Helper macro to create a GPU_ID assuming valid values for id, major,
    minor, status */
 #define GPU_ID_MAKE(id, major, minor, status) \
-		(((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
-		((major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
-		((minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
-		((status) << GPU_ID_VERSION_STATUS_SHIFT))
+		((((u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		(((u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		(((u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		(((u32)status) << GPU_ID_VERSION_STATUS_SHIFT))
 
 #endif /* _KBASE_GPU_ID_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
index 2fd0332..514b065 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -70,7 +70,7 @@
  */
 static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
 {
-	return single_open(file, kbasep_gpu_memory_seq_show , NULL);
+	return single_open(file, kbasep_gpu_memory_seq_show, NULL);
 }
 
 static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
index 9a9ce2d9..62ba105c 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -138,7 +138,7 @@
 	gpu_props->raw_props.mem_features = regdump.mem_features;
 	gpu_props->raw_props.mmu_features = regdump.mmu_features;
 	gpu_props->raw_props.l2_features = regdump.l2_features;
-	gpu_props->raw_props.suspend_size = regdump.suspend_size;
+	gpu_props->raw_props.core_features = regdump.core_features;
 
 	gpu_props->raw_props.as_present = regdump.as_present;
 	gpu_props->raw_props.js_present = regdump.js_present;
@@ -165,6 +165,7 @@
 	gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
 	gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
 	gpu_props->raw_props.thread_features = regdump.thread_features;
+	gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc;
 }
 
 void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props)
@@ -195,6 +196,8 @@
 	kbase_gpuprops_update_core_props_gpu_id(gpu_props);
 	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
 	gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
+	gpu_props->core_props.num_exec_engines =
+		KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4);
 
 	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
 		gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
@@ -226,6 +229,13 @@
 	else
 		gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
 
+	if (gpu_props->raw_props.thread_tls_alloc == 0)
+		gpu_props->thread_props.tls_alloc =
+				gpu_props->thread_props.max_threads;
+	else
+		gpu_props->thread_props.tls_alloc =
+				gpu_props->raw_props.thread_tls_alloc;
+
 	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
 	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
 	gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
@@ -312,6 +322,7 @@
 	PROP(TEXTURE_FEATURES_2,          core_props.texture_features[2]),
 	PROP(TEXTURE_FEATURES_3,          core_props.texture_features[3]),
 	PROP(GPU_AVAILABLE_MEMORY_SIZE,   core_props.gpu_available_memory_size),
+	PROP(NUM_EXEC_ENGINES,            core_props.num_exec_engines),
 
 	PROP(L2_LOG2_LINE_SIZE,           l2_props.log2_line_size),
 	PROP(L2_LOG2_CACHE_SIZE,          l2_props.log2_cache_size),
@@ -327,13 +338,14 @@
 	PROP(MAX_TASK_QUEUE,              thread_props.max_task_queue),
 	PROP(MAX_THREAD_GROUP_SPLIT,      thread_props.max_thread_group_split),
 	PROP(IMPL_TECH,                   thread_props.impl_tech),
+	PROP(TLS_ALLOC,                   thread_props.tls_alloc),
 
 	PROP(RAW_SHADER_PRESENT,          raw_props.shader_present),
 	PROP(RAW_TILER_PRESENT,           raw_props.tiler_present),
 	PROP(RAW_L2_PRESENT,              raw_props.l2_present),
 	PROP(RAW_STACK_PRESENT,           raw_props.stack_present),
 	PROP(RAW_L2_FEATURES,             raw_props.l2_features),
-	PROP(RAW_SUSPEND_SIZE,            raw_props.suspend_size),
+	PROP(RAW_CORE_FEATURES,           raw_props.core_features),
 	PROP(RAW_MEM_FEATURES,            raw_props.mem_features),
 	PROP(RAW_MMU_FEATURES,            raw_props.mmu_features),
 	PROP(RAW_AS_PRESENT,              raw_props.as_present),
@@ -365,6 +377,7 @@
 			raw_props.thread_max_workgroup_size),
 	PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size),
 	PROP(RAW_THREAD_FEATURES,         raw_props.thread_features),
+	PROP(RAW_THREAD_TLS_ALLOC,        raw_props.thread_tls_alloc),
 	PROP(RAW_COHERENCY_MODE,          raw_props.coherency_mode),
 
 	PROP(COHERENCY_NUM_GROUPS,        coherency_info.num_groups),
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
index a3ddec79..d7877d1 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -38,7 +38,7 @@
 struct kbase_gpuprops_regdump {
 	u32 gpu_id;
 	u32 l2_features;
-	u32 suspend_size; /* API 8.2+ */
+	u32 core_features;
 	u32 tiler_features;
 	u32 mem_features;
 	u32 mmu_features;
@@ -48,6 +48,7 @@
 	u32 thread_max_workgroup_size;
 	u32 thread_max_barrier_size;
 	u32 thread_features;
+	u32 thread_tls_alloc;
 	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
 	u32 js_features[GPU_MAX_JOB_SLOTS];
 	u32 shader_present_lo;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gwt.c b/drivers/gpu/arm/midgard/mali_kbase_gwt.c
index 2caab87..0481f80 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gwt.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_gwt.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -55,8 +55,6 @@
 	kbase_gpu_gwt_setup_page_permission(kctx, flag,
 				rb_first(&(kctx->reg_rbtree_same)));
 	kbase_gpu_gwt_setup_page_permission(kctx, flag,
-				rb_first(&(kctx->reg_rbtree_exec)));
-	kbase_gpu_gwt_setup_page_permission(kctx, flag,
 				rb_first(&(kctx->reg_rbtree_custom)));
 }
 
@@ -124,7 +122,8 @@
 }
 
 
-int list_cmp_function(void *priv, struct list_head *a, struct list_head *b)
+static int list_cmp_function(void *priv, struct list_head *a,
+				struct list_head *b)
 {
 	struct kbasep_gwt_list_element *elementA = container_of(a,
 				struct kbasep_gwt_list_element, link);
@@ -133,30 +132,27 @@
 
 	CSTD_UNUSED(priv);
 
-	if (elementA->handle > elementB->handle)
+	if (elementA->page_addr > elementB->page_addr)
 		return 1;
-	else if ((elementA->handle == elementB->handle) &&
-			(elementA->offset > elementB->offset))
-		return 1;
-	else
-		return -1;
+	return -1;
 }
 
-void kbase_gpu_gwt_collate(struct kbase_context *kctx,
+static void kbase_gpu_gwt_collate(struct kbase_context *kctx,
 		struct list_head *snapshot_list)
 {
 	struct kbasep_gwt_list_element *pos, *n;
 	struct kbasep_gwt_list_element *collated = NULL;
 
-	/* sort the list */
+	/* Sort the list */
 	list_sort(NULL, snapshot_list, list_cmp_function);
 
-	/* Combine contiguous areas from same region */
+	/* Combine contiguous areas. */
 	list_for_each_entry_safe(pos, n, snapshot_list, link) {
-		if (NULL == collated ||
-				collated->handle != pos->handle ||
-				collated->offset + collated->num_pages !=
-						pos->offset) {
+		if (collated == NULL ||	collated->region !=
+					pos->region ||
+					(collated->page_addr +
+					(collated->num_pages * PAGE_SIZE)) !=
+					pos->page_addr) {
 			/* This is the first time through, a new region or
 			 * is not contiguous - start collating to this element
 			 */
@@ -176,10 +172,8 @@
 {
 	const u32 ubuf_size = gwt_dump->in.len;
 	u32 ubuf_count = 0;
-	__user void *user_handles = (__user void *)
-			(uintptr_t)gwt_dump->in.handle_buffer;
-	__user void *user_offsets = (__user void *)
-			(uintptr_t)gwt_dump->in.offset_buffer;
+	__user void *user_addr = (__user void *)
+			(uintptr_t)gwt_dump->in.addr_buffer;
 	__user void *user_sizes = (__user void *)
 			(uintptr_t)gwt_dump->in.size_buffer;
 
@@ -191,8 +185,7 @@
 		return -EPERM;
 	}
 
-	if (!gwt_dump->in.len || !gwt_dump->in.handle_buffer
-			|| !gwt_dump->in.offset_buffer
+	if (!gwt_dump->in.len || !gwt_dump->in.addr_buffer
 			|| !gwt_dump->in.size_buffer) {
 		kbase_gpu_vm_unlock(kctx);
 		/* We don't have any valid user space buffer to copy the
@@ -219,8 +212,7 @@
 	}
 
 	while ((!list_empty(&kctx->gwt_snapshot_list))) {
-		u64 handle_buffer[32];
-		u64 offset_buffer[32];
+		u64 addr_buffer[32];
 		u64 num_page_buffer[32];
 		u32 count = 0;
 		int err;
@@ -228,30 +220,20 @@
 
 		list_for_each_entry_safe(dump_info, n,
 				&kctx->gwt_snapshot_list, link) {
-			handle_buffer[count] = dump_info->handle;
-			offset_buffer[count] = dump_info->offset;
+			addr_buffer[count] = dump_info->page_addr;
 			num_page_buffer[count] = dump_info->num_pages;
 			count++;
 			list_del(&dump_info->link);
 			kfree(dump_info);
-			if (ARRAY_SIZE(handle_buffer) == count ||
+			if (ARRAY_SIZE(addr_buffer) == count ||
 					ubuf_size == (ubuf_count + count))
 				break;
 		}
 
 		if (count) {
-			err = copy_to_user((user_handles +
-						(ubuf_count * sizeof(u64))),
-					(void *)handle_buffer,
-					count * sizeof(u64));
-			if (err) {
-				dev_err(kctx->kbdev->dev, "Copy to user failure\n");
-				kbase_gpu_vm_unlock(kctx);
-				return err;
-			}
-			err = copy_to_user((user_offsets +
-						(ubuf_count * sizeof(u64))),
-					(void *)offset_buffer,
+			err = copy_to_user((user_addr +
+					(ubuf_count * sizeof(u64))),
+					(void *)addr_buffer,
 					count * sizeof(u64));
 			if (err) {
 				dev_err(kctx->kbdev->dev, "Copy to user failure\n");
@@ -259,7 +241,7 @@
 				return err;
 			}
 			err = copy_to_user((user_sizes +
-						(ubuf_count * sizeof(u64))),
+					(ubuf_count * sizeof(u64))),
 					(void *)num_page_buffer,
 					count * sizeof(u64));
 			if (err) {
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c
index 286cc954..f34f53a 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hw.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c
@@ -176,6 +176,7 @@
 
 		{GPU_ID2_PRODUCT_TGOX,
 		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0},
 		  {U32_MAX, NULL} } },
 
 		{GPU_ID2_PRODUCT_TKAX,
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
index dd25746..124a2d9 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -31,9 +31,19 @@
 
 #include <mali_kbase_jm_defs.h>
 
-/* The hwaccess_lock (a spinlock) must be held when accessing this structure */
+/**
+ * struct kbase_hwaccess_data - object encapsulating the GPU backend specific
+ *                              data for the HW access layer.
+ *                              hwaccess_lock (a spinlock) must be held when
+ *                              accessing this structure.
+ * @active_kctx:     pointer to active kbase context which last submitted an
+ *                   atom to GPU and while the context is active it can
+ *                   submit new atoms to GPU from the irq context also, without
+ *                   going through the bottom half of job completion path.
+ * @backend:         GPU backend specific data for HW access layer
+ */
 struct kbase_hwaccess_data {
-	struct kbase_context *active_kctx;
+	struct kbase_context *active_kctx[BASE_JM_MAX_NR_SLOTS];
 
 	struct kbase_backend_data backend;
 };
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
index b8ab0dc..63844d9 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -33,6 +33,8 @@
  *				  GPU
  * @kbdev:	Device pointer
  * @regdump:	Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * The caller should ensure that GPU remains powered-on during this function.
  */
 void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
 					struct kbase_gpuprops_regdump *regdump);
@@ -43,7 +45,7 @@
  * @regdump: Pointer to struct kbase_gpuprops_regdump structure
  *
  * This function reads GPU properties that are dependent on the hardware
- * features bitmask
+ * features bitmask. It will power-on the GPU if required.
  */
 void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
 					struct kbase_gpuprops_regdump *regdump);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
index d180e39..0c5ceff 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -35,15 +35,15 @@
  * kbase_instr_hwcnt_enable_internal - Enable HW counters collection
  * @kbdev:	Kbase device
  * @kctx:	Kbase context
- * @setup:	HW counter setup parameters
+ * @enable:	HW counter setup parameters
  *
  * Context: might sleep, waiting for reset to complete
  *
  * Return: 0 on success
  */
 int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
-					struct kbase_context *kctx,
-					struct kbase_uk_hwcnt_setup *setup);
+				struct kbase_context *kctx,
+				struct kbase_ioctl_hwcnt_enable *enable);
 
 /**
  * kbase_instr_hwcnt_disable_internal - Disable HW counters collection
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
index 8b3d7e20..580ac987 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -86,6 +86,7 @@
  * kbase_backend_use_ctx_sched() - Activate a context.
  * @kbdev:	Device pointer
  * @kctx:	Context pointer
+ * @js:         Job slot to activate context on
  *
  * kbase_gpu_next_job() will pull atoms from the active context.
  *
@@ -99,7 +100,7 @@
  *	   not have an address space assigned)
  */
 bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
-					struct kbase_context *kctx);
+					struct kbase_context *kctx, int js);
 
 /**
  * kbase_backend_release_ctx_irq - Release a context from the GPU. This will
@@ -159,14 +160,13 @@
  *                                        any scheduling has taken place.
  * @kbdev:         Device pointer
  * @core_req:      Core requirements of atom
- * @affinity:      Affinity of atom
  * @coreref_state: Coreref state of atom
  *
  * This function should only be called from kbase_jd_done_worker() or
  * js_return_worker().
  */
 void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
-		base_jd_core_req core_req, u64 affinity,
+		base_jd_core_req core_req,
 		enum kbase_atom_coreref_state coreref_state);
 
 /**
@@ -178,17 +178,6 @@
 void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
 
 /**
- * kbase_backend_inspect_head() - Return the atom currently at the head of slot
- *				  @js
- * @kbdev:	Device pointer
- * @js:		Job slot to inspect
- *
- * Return : Atom currently at the head of slot @js, or NULL
- */
-struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
-					int js);
-
-/**
  * kbase_backend_inspect_tail - Return the atom currently at the tail of slot
  *                              @js
  * @kbdev: Device pointer
@@ -381,6 +370,9 @@
 void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
 				struct kbase_jd_atom *target_katom);
 
+/* Object containing callbacks for enabling/disabling protected mode, used
+ * on GPU which supports protected mode switching natively.
+ */
 extern struct protected_mode_ops kbase_native_protected_ops;
 
 #endif /* _KBASE_HWACCESS_JM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
index 7f64936..9b86b510 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -41,7 +41,7 @@
 
 /**
  * kbase_wait_write_flush() -  Wait for GPU write flush
- * @kctx:	Context pointer
+ * @kbdev:	Kbase device
  *
  * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
  * its write buffer.
@@ -52,7 +52,7 @@
  * This function is only in use for BASE_HW_ISSUE_6367
  */
 #ifndef CONFIG_MALI_NO_MALI
-void kbase_wait_write_flush(struct kbase_context *kctx);
+void kbase_wait_write_flush(struct kbase_device *kbdev);
 #endif
 
 #endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_ioctl.h b/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
index a8fe9cd..bee2f3a1 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,6 +27,7 @@
 extern "C" {
 #endif
 
+#include <asm-generic/ioctl.h>
 #include <linux/types.h>
 
 #define KBASE_IOCTL_TYPE 0x80
@@ -43,20 +44,27 @@
  *   KBASE_IOCTL_STICKY_RESOURCE_UNMAP
  * 11.4:
  * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET
+ * 11.5:
+ * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD)
+ * 11.6:
+ * - Added flags field to base_jit_alloc_info structure, which can be used to
+ *   specify pseudo chunked tiler alignment for JIT allocations.
+ * 11.7:
+ * - Removed UMP support
+ * 11.8:
+ * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags
+ * 11.9:
+ * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY
+ *   under base_mem_alloc_flags
+ * 11.10:
+ * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for
+ *   JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations
+ *   with one softjob.
+ * 11.11:
+ * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags
  */
 #define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 4
-
-#ifdef ANDROID
-/* Android's definition of ioctl is incorrect, specifying the type argument as
- * 'int'. This creates a warning when using _IOWR (as the top bit is set). Work
- * round this by redefining _IOC to include a case to 'int'.
- */
-#undef _IOC
-#define _IOC(dir, type, nr, size) \
-	((int)(((dir) << _IOC_DIRSHIFT) | ((type) << _IOC_TYPESHIFT) | \
-	((nr) << _IOC_NRSHIFT) | ((size) << _IOC_SIZESHIFT)))
-#endif
+#define BASE_UK_VERSION_MINOR 11
 
 /**
  * struct kbase_ioctl_version_check - Check version compatibility with kernel
@@ -191,9 +199,9 @@
 #define KBASE_IOCTL_MEM_QUERY \
 	_IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query)
 
-#define KBASE_MEM_QUERY_COMMIT_SIZE	1
-#define KBASE_MEM_QUERY_VA_SIZE		2
-#define KBASE_MEM_QUERY_FLAGS		3
+#define KBASE_MEM_QUERY_COMMIT_SIZE	((u64)1)
+#define KBASE_MEM_QUERY_VA_SIZE		((u64)2)
+#define KBASE_MEM_QUERY_FLAGS		((u64)3)
 
 /**
  * struct kbase_ioctl_mem_free - Free a memory region
@@ -253,6 +261,21 @@
 	_IO(KBASE_IOCTL_TYPE, 11)
 
 /**
+ * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to.
+ * @data:    Counter samples for the dummy model.
+ * @size:    Size of the counter sample data.
+ * @padding: Padding.
+ */
+struct kbase_ioctl_hwcnt_values {
+	__u64 data;
+	__u32 size;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_HWCNT_SET \
+	_IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values)
+
+/**
  * struct kbase_ioctl_disjoint_query - Query the disjoint counter
  * @counter:   A counter of disjoint events in the kernel
  */
@@ -271,6 +294,10 @@
  *
  * The ioctl will return the number of bytes written into version_buffer
  * (which includes a NULL byte) or a negative error code
+ *
+ * The ioctl request code has to be _IOW because the data in ioctl struct is
+ * being copied to the kernel, even though the kernel then writes out the
+ * version info to the buffer specified in the ioctl.
  */
 struct kbase_ioctl_get_ddk_version {
 	__u64 version_buffer;
@@ -282,15 +309,39 @@
 	_IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version)
 
 /**
+ * struct kbase_ioctl_mem_jit_init_old - Initialise the JIT memory allocator
+ *
+ * @va_pages: Number of VA pages to reserve for JIT
+ *
+ * Note that depending on the VA size of the application and GPU, the value
+ * specified in @va_pages may be ignored.
+ *
+ * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for
+ * backwards compatibility.
+ */
+struct kbase_ioctl_mem_jit_init_old {
+	__u64 va_pages;
+};
+
+#define KBASE_IOCTL_MEM_JIT_INIT_OLD \
+	_IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_old)
+
+/**
  * struct kbase_ioctl_mem_jit_init - Initialise the JIT memory allocator
  *
  * @va_pages: Number of VA pages to reserve for JIT
+ * @max_allocations: Maximum number of concurrent allocations
+ * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%)
+ * @padding: Currently unused, must be zero
  *
  * Note that depending on the VA size of the application and GPU, the value
  * specified in @va_pages may be ignored.
  */
 struct kbase_ioctl_mem_jit_init {
 	__u64 va_pages;
+	__u8 max_allocations;
+	__u8 trim_level;
+	__u8 padding[6];
 };
 
 #define KBASE_IOCTL_MEM_JIT_INIT \
@@ -595,7 +646,6 @@
 #define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \
 	_IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset)
 
-/* IOCTL 32 is free for use */
 
 #define KBASE_IOCTL_CINSTR_GWT_START \
 	_IO(KBASE_IOCTL_TYPE, 33)
@@ -605,9 +655,7 @@
 
 /**
  * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses.
- * @handle_buffer: Address of buffer to hold handles of modified areas.
- * @offset_buffer: Address of buffer to hold offset size of modified areas
- *                 (in pages)
+ * @addr_buffer: Address of buffer to hold addresses of gpu modified areas.
  * @size_buffer: Address of buffer to hold size of modified areas (in pages)
  * @len: Number of addresses the buffers can hold.
  * @more_data_available: Status indicating if more addresses are available.
@@ -615,13 +663,13 @@
  *
  * @in: Input parameters
  * @out: Output parameters
+ *
  * This structure is used when performing a call to dump GPU write fault
  * addresses.
  */
 union kbase_ioctl_cinstr_gwt_dump {
 	struct {
-		__u64 handle_buffer;
-		__u64 offset_buffer;
+		__u64 addr_buffer;
 		__u64 size_buffer;
 		__u32 len;
 		__u32 padding;
@@ -637,7 +685,6 @@
 #define KBASE_IOCTL_CINSTR_GWT_DUMP \
 	_IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump)
 
-/* IOCTLs 36-41 are reserved */
 
 /***************
  * test ioctls *
@@ -680,6 +727,37 @@
 #define KBASE_IOCTL_TLSTREAM_STATS \
 	_IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats)
 
+/**
+ * struct kbase_ioctl_cs_event_memory_write - Write an event memory address
+ * @cpu_addr: Memory address to write
+ * @value: Value to write
+ * @padding: Currently unused, must be zero
+ */
+struct kbase_ioctl_cs_event_memory_write {
+	__u64 cpu_addr;
+	__u8 value;
+	__u8 padding[7];
+};
+
+/**
+ * union kbase_ioctl_cs_event_memory_read - Read an event memory address
+ * @cpu_addr: Memory address to read
+ * @value: Value read
+ * @padding: Currently unused, must be zero
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_cs_event_memory_read {
+	struct {
+		__u64 cpu_addr;
+	} in;
+	struct {
+		__u8 value;
+		__u8 padding[7];
+	} out;
+};
+
 #endif
 
 /**********************************
@@ -723,7 +801,7 @@
 #define KBASE_GPUPROP_RAW_L2_PRESENT			27
 #define KBASE_GPUPROP_RAW_STACK_PRESENT			28
 #define KBASE_GPUPROP_RAW_L2_FEATURES			29
-#define KBASE_GPUPROP_RAW_SUSPEND_SIZE			30
+#define KBASE_GPUPROP_RAW_CORE_FEATURES			30
 #define KBASE_GPUPROP_RAW_MEM_FEATURES			31
 #define KBASE_GPUPROP_RAW_MMU_FEATURES			32
 #define KBASE_GPUPROP_RAW_AS_PRESENT			33
@@ -778,6 +856,11 @@
 #define KBASE_GPUPROP_TEXTURE_FEATURES_3		80
 #define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3		81
 
+#define KBASE_GPUPROP_NUM_EXEC_ENGINES                  82
+
+#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC		83
+#define KBASE_GPUPROP_TLS_ALLOC				84
+
 #ifdef __cpluscplus
 }
 #endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c
index 91cfee7..7a862bcc 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_jd.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -91,6 +91,7 @@
 	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
 		/* Soft-job */
 		if (katom->will_fail_event_code) {
+			kbase_finish_soft_job(katom);
 			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
 			return 0;
 		}
@@ -200,7 +201,7 @@
 	struct kbase_dma_fence_resv_info info = {
 		.dma_fence_resv_count = 0,
 	};
-#ifdef CONFIG_SYNC
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
 	/*
 	 * When both dma-buf fence and Android native sync is enabled, we
 	 * disable dma-buf fence for contexts that are using Android native
@@ -208,9 +209,9 @@
 	 */
 	const bool implicit_sync = !kbase_ctx_flag(katom->kctx,
 						   KCTX_NO_IMPLICIT_SYNC);
-#else /* CONFIG_SYNC */
+#else /* CONFIG_SYNC || CONFIG_SYNC_FILE*/
 	const bool implicit_sync = true;
-#endif /* CONFIG_SYNC */
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
 #endif /* CONFIG_MALI_DMA_FENCE */
 	struct base_external_resource *input_extres;
 
@@ -808,7 +809,6 @@
 	katom->nr_extres = user_atom->nr_extres;
 	katom->extres = NULL;
 	katom->device_nr = user_atom->device_nr;
-	katom->affinity = 0;
 	katom->jc = user_atom->jc;
 	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
 	katom->core_req = user_atom->core_req;
@@ -923,10 +923,35 @@
 
 	if (will_fail) {
 		if (!queued) {
+			if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+				/* This softjob has failed due to a previous
+				 * dependency, however we should still run the
+				 * prepare & finish functions
+				 */
+				int err = kbase_prepare_soft_job(katom);
+
+				if (err >= 0)
+					kbase_finish_soft_job(katom);
+			}
+
 			ret = jd_done_nolock(katom, NULL);
 
 			goto out;
 		} else {
+
+			if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+				/* This softjob has failed due to a previous
+				 * dependency, however we should still run the
+				 * prepare & finish functions
+				 */
+				if (kbase_prepare_soft_job(katom) != 0) {
+					katom->event_code =
+						BASE_JD_EVENT_JOB_INVALID;
+					ret = jd_done_nolock(katom, NULL);
+					goto out;
+				}
+			}
+
 			katom->will_fail_event_code = katom->event_code;
 			ret = false;
 
@@ -1003,11 +1028,13 @@
 		goto out;
 	}
 
-	/* Reject fence wait soft-job atoms accessing external resources */
+	/* Reject soft-job atom of certain types from accessing external resources */
 	if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) &&
-			 ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT)) {
+			(((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) ||
+			 ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_ALLOC) ||
+			 ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_FREE))) {
 		dev_warn(kctx->kbdev->dev,
-				"Rejecting fence wait soft-job atom accessing external resources");
+				"Rejecting soft-job atom accessing external resources");
 		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
 		ret = jd_done_nolock(katom, NULL);
 		goto out;
@@ -1123,9 +1150,6 @@
 		return -EINVAL;
 	}
 
-	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(nr_atoms,
-				&kctx->timeline.jd_atoms_in_flight));
-
 	/* All atoms submitted in this call have the same flush ID */
 	latest_flush = kbase_backend_get_current_flush_id(kbdev);
 
@@ -1136,9 +1160,6 @@
 		if (copy_from_user(&user_atom, user_addr,
 					sizeof(user_atom)) != 0) {
 			err = -EINVAL;
-			KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx,
-				atomic_sub_return(nr_atoms - i,
-				&kctx->timeline.jd_atoms_in_flight));
 			break;
 		}
 
@@ -1222,7 +1243,6 @@
 	struct kbasep_js_atom_retained_state katom_retained_state;
 	bool context_idle;
 	base_jd_core_req core_req = katom->core_req;
-	u64 affinity = katom->affinity;
 	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
 
 	/* Soft jobs should never reach this function */
@@ -1270,7 +1290,8 @@
 		return;
 	}
 
-	if (katom->event_code != BASE_JD_EVENT_DONE)
+	if ((katom->event_code != BASE_JD_EVENT_DONE) &&
+			(!kbase_ctx_flag(katom->kctx, KCTX_DYING)))
 		dev_err(kbdev->dev,
 			"t6xx: GPU fault 0x%02lx from job slot %d\n",
 					(unsigned long)katom->event_code,
@@ -1368,8 +1389,7 @@
 		mutex_unlock(&jctx->lock);
 	}
 
-	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
-			coreref_state);
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, coreref_state);
 
 	if (context_idle)
 		kbase_pm_context_idle(kbdev);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.c b/drivers/gpu/arm/midgard/mali_kbase_jm.c
index 3b16e6f7..da78a167 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_jm.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_jm.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -44,7 +44,7 @@
 	struct kbase_context *kctx;
 	int i;
 
-	kctx = kbdev->hwaccess.active_kctx;
+	kctx = kbdev->hwaccess.active_kctx[js];
 
 	if (!kctx)
 		return true;
@@ -106,10 +106,14 @@
 
 void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
 {
+	int js;
+
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	if (kbdev->hwaccess.active_kctx == kctx)
-		kbdev->hwaccess.active_kctx = NULL;
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		if (kbdev->hwaccess.active_kctx[js] == kctx)
+			kbdev->hwaccess.active_kctx[js] = NULL;
+	}
 }
 
 struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c
index 8f50b3c..66a8444 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_js.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -422,7 +422,7 @@
 int kbasep_js_devdata_init(struct kbase_device * const kbdev)
 {
 	struct kbasep_js_device_data *jsdd;
-	int i;
+	int i, j;
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 
@@ -527,8 +527,10 @@
 	sema_init(&jsdd->schedule_sem, 1);
 
 	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
-		INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i]);
-		INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i]);
+		for (j = 0; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) {
+			INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i][j]);
+			INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i][j]);
+		}
 	}
 
 	return 0;
@@ -552,13 +554,13 @@
 	 */
 	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0);
 	KBASE_DEBUG_ASSERT(memcmp(
-	        js_devdata->runpool_irq.ctx_attr_ref_count,
-	        zero_ctx_attr_ref_count,
-	        sizeof(zero_ctx_attr_ref_count)) == 0);
+				  js_devdata->runpool_irq.ctx_attr_ref_count,
+				  zero_ctx_attr_ref_count,
+				  sizeof(zero_ctx_attr_ref_count)) == 0);
 	CSTD_UNUSED(zero_ctx_attr_ref_count);
 }
 
-int kbasep_js_kctx_init(struct kbase_context * const kctx)
+int kbasep_js_kctx_init(struct kbase_context *const kctx)
 {
 	struct kbase_device *kbdev;
 	struct kbasep_js_kctx_info *js_kctx_info;
@@ -606,6 +608,7 @@
 	struct kbasep_js_kctx_info *js_kctx_info;
 	int js;
 	bool update_ctx_count = false;
+	unsigned long flags;
 
 	KBASE_DEBUG_ASSERT(kctx != NULL);
 
@@ -621,8 +624,10 @@
 	mutex_lock(&kbdev->js_data.queue_mutex);
 	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
 
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
 		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) {
 		WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
@@ -666,7 +671,7 @@
 		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
 
 	list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
-					&kbdev->js_data.ctx_list_pullable[js]);
+			&kbdev->js_data.ctx_list_pullable[js][kctx->priority]);
 
 	if (!kctx->slots_pullable) {
 		kbdev->js_data.nr_contexts_pullable++;
@@ -706,7 +711,7 @@
 		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
 
 	list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
-					&kbdev->js_data.ctx_list_pullable[js]);
+			&kbdev->js_data.ctx_list_pullable[js][kctx->priority]);
 
 	if (!kctx->slots_pullable) {
 		kbdev->js_data.nr_contexts_pullable++;
@@ -777,7 +782,7 @@
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
-				&kbdev->js_data.ctx_list_unpullable[js]);
+		&kbdev->js_data.ctx_list_unpullable[js][kctx->priority]);
 
 	if (kctx->slots_pullable == (1 << js)) {
 		kbdev->js_data.nr_contexts_pullable--;
@@ -852,19 +857,23 @@
 						int js)
 {
 	struct kbase_context *kctx;
+	int i;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	if (list_empty(&kbdev->js_data.ctx_list_pullable[js]))
-		return NULL;
+	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+		if (list_empty(&kbdev->js_data.ctx_list_pullable[js][i]))
+			continue;
 
-	kctx = list_entry(kbdev->js_data.ctx_list_pullable[js].next,
-					struct kbase_context,
-					jctx.sched_info.ctx.ctx_list_entry[js]);
+		kctx = list_entry(kbdev->js_data.ctx_list_pullable[js][i].next,
+				struct kbase_context,
+				jctx.sched_info.ctx.ctx_list_entry[js]);
 
-	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
 
-	return kctx;
+		return kctx;
+	}
+	return NULL;
 }
 
 /**
@@ -1065,6 +1074,51 @@
 	return ret;
 }
 
+void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Move kctx to the pullable/upullable list as per the new priority */
+	if (new_priority != kctx->priority) {
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kctx->slots_pullable & (1 << js))
+				list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js][new_priority]);
+			else
+				list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_unpullable[js][new_priority]);
+		}
+
+		kctx->priority = new_priority;
+	}
+}
+
+void kbase_js_update_ctx_priority(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int new_priority = KBASE_JS_ATOM_SCHED_PRIO_LOW;
+	int prio;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->js_ctx_scheduling_mode == KBASE_JS_SYSTEM_PRIORITY_MODE) {
+		/* Determine the new priority for context, as per the priority
+		 * of currently in-use atoms.
+		 */
+		for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+			if (kctx->atoms_count[prio]) {
+				new_priority = prio;
+				break;
+			}
+		}
+	}
+
+	kbase_js_set_ctx_priority(kctx, new_priority);
+}
+
 bool kbasep_js_add_job(struct kbase_context *kctx,
 		struct kbase_jd_atom *atom)
 {
@@ -1099,6 +1153,9 @@
 	/* Lock for state available during IRQ */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
+	if (++kctx->atoms_count[atom->sched_priority] == 1)
+		kbase_js_update_ctx_priority(kctx);
+
 	if (!kbase_js_dep_validate(kctx, atom)) {
 		/* Dependencies could not be represented */
 		--(js_kctx_info->ctx.nr_jobs);
@@ -1107,6 +1164,19 @@
 		 * dependencies */
 		atom->status = KBASE_JD_ATOM_STATE_QUEUED;
 
+		/* Undo the count, as the atom will get added again later but
+		 * leave the context priority adjusted or boosted, in case if
+		 * this was the first higher priority atom received for this
+		 * context.
+		 * This will prevent the scenario of priority inversion, where
+		 * another context having medium priority atoms keeps getting
+		 * scheduled over this context, which is having both lower and
+		 * higher priority atoms, but higher priority atoms are blocked
+		 * due to dependency on lower priority atoms. With priority
+		 * boost the high priority atom will get to run at earliest.
+		 */
+		kctx->atoms_count[atom->sched_priority]--;
+
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		mutex_unlock(&js_devdata->runpool_mutex);
 
@@ -1114,7 +1184,6 @@
 	}
 
 	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_READY);
-	KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom));
 
 	enqueue_required = kbase_js_dep_resolved_submit(kctx, atom);
 
@@ -1134,7 +1203,8 @@
 	}
 	/* If this context is active and the atom is the first on its slot,
 	 * kick the job manager to attempt to fast-start the atom */
-	if (enqueue_required && kctx == kbdev->hwaccess.active_kctx)
+	if (enqueue_required && kctx ==
+			kbdev->hwaccess.active_kctx[atom->slot_nr])
 		kbase_jm_try_kick(kbdev, 1 << atom->slot_nr);
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -1173,6 +1243,7 @@
 		struct kbase_context *kctx, struct kbase_jd_atom *atom)
 {
 	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	KBASE_DEBUG_ASSERT(kctx != NULL);
@@ -1186,6 +1257,11 @@
 	/* De-refcount ctx.nr_jobs */
 	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
 	--(js_kctx_info->ctx.nr_jobs);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (--kctx->atoms_count[atom->sched_priority] == 0)
+		kbase_js_update_ctx_priority(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
@@ -1256,9 +1332,8 @@
 }
 
 /**
- * kbasep_js_release_result - Try running more jobs after releasing a context
- *                            and/or atom
- *
+ * kbasep_js_run_jobs_after_ctx_and_atom_release - Try running more jobs after
+ *                           releasing a context and/or atom
  * @kbdev:                   The kbase_device to operate on
  * @kctx:                    The kbase_context to operate on
  * @katom_retained_state:    Retained state from the atom
@@ -1304,12 +1379,15 @@
 	return result;
 }
 
-/*
- * Internal function to release the reference on a ctx and an atom's "retained
- * state", only taking the runpool and as transaction mutexes
+/**
+ * kbasep_js_runpool_release_ctx_internal - Internal function to release the reference
+ *                                          on a ctx and an atom's "retained state", only
+ *                                          taking the runpool and as transaction mutexes
+ * @kbdev:                   The kbase_device to operate on
+ * @kctx:                    The kbase_context to operate on
+ * @katom_retained_state:    Retained state from the atom
  *
- * This also starts more jobs running in the case of an ctx-attribute state
- * change
+ * This also starts more jobs running in the case of an ctx-attribute state change
  *
  * This does none of the followup actions for scheduling:
  * - It does not schedule in a new context
@@ -1317,11 +1395,15 @@
  *
  * For those tasks, just call kbasep_js_runpool_release_ctx() instead
  *
- * Requires:
+ * Has following requirements
  * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr
  * - Context has a non-zero refcount
  * - Caller holds js_kctx_info->ctx.jsctx_mutex
  * - Caller holds js_devdata->runpool_mutex
+ *
+ * Return: A bitpattern, containing KBASEP_JS_RELEASE_RESULT_* flags, indicating
+ *         the result of releasing a context that whether the caller should try
+ *         scheduling a new context or should try scheduling all contexts.
  */
 static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 		struct kbase_device *kbdev,
@@ -1407,8 +1489,10 @@
 
 		kbase_backend_release_ctx_irq(kbdev, kctx);
 
-		if (kbdev->hwaccess.active_kctx == kctx)
-			kbdev->hwaccess.active_kctx = NULL;
+		for (slot = 0; slot < num_slots; slot++) {
+			if (kbdev->hwaccess.active_kctx[slot] == kctx)
+				kbdev->hwaccess.active_kctx[slot] = NULL;
+		}
 
 		/* Ctx Attribute handling
 		 *
@@ -1600,7 +1684,8 @@
 }
 
 static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
-					struct kbase_context *kctx)
+					struct kbase_context *kctx,
+					int js)
 {
 	struct kbasep_js_device_data *js_devdata;
 	struct kbasep_js_kctx_info *js_kctx_info;
@@ -1676,7 +1761,7 @@
 		return false;
 	}
 
-	kbdev->hwaccess.active_kctx = kctx;
+	kbdev->hwaccess.active_kctx[js] = kctx;
 
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
 	kbase_trace_mali_mmu_as_in_use(kctx->as_nr);
@@ -1709,6 +1794,8 @@
 		kctx_suspended = true;
 	}
 
+	kbase_ctx_flag_clear(kctx, KCTX_PULLED_SINCE_ACTIVE_JS0 << js);
+
 	/* Transaction complete */
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	mutex_unlock(&kbdev->mmu_hw_mutex);
@@ -1733,23 +1820,27 @@
 }
 
 static bool kbase_js_use_ctx(struct kbase_device *kbdev,
-				struct kbase_context *kctx)
+				struct kbase_context *kctx,
+				int js)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
-			kbase_backend_use_ctx_sched(kbdev, kctx)) {
+			kbase_backend_use_ctx_sched(kbdev, kctx, js)) {
 		/* Context already has ASID - mark as active */
-		kbdev->hwaccess.active_kctx = kctx;
+		if (kbdev->hwaccess.active_kctx[js] != kctx) {
+			kbdev->hwaccess.active_kctx[js] = kctx;
+			kbase_ctx_flag_clear(kctx,
+					KCTX_PULLED_SINCE_ACTIVE_JS0 << js);
+		}
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		return true; /* Context already scheduled */
 	}
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-	return kbasep_js_schedule_ctx(kbdev, kctx);
+	return kbasep_js_schedule_ctx(kbdev, kctx, js);
 }
 
 void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
@@ -1846,7 +1937,7 @@
 
 		retained = retained << 1;
 
-		if (kctx) {
+		if (kctx && !(kbdev->as_free & (1u << i))) {
 			kbase_ctx_sched_retain_ctx_refcount(kctx);
 			retained |= 1u;
 			/* We can only cope with up to 1 privileged context -
@@ -1880,7 +1971,7 @@
 void kbasep_js_resume(struct kbase_device *kbdev)
 {
 	struct kbasep_js_device_data *js_devdata;
-	int js;
+	int js, prio;
 
 	KBASE_DEBUG_ASSERT(kbdev);
 	js_devdata = &kbdev->js_data;
@@ -1888,31 +1979,43 @@
 
 	mutex_lock(&js_devdata->queue_mutex);
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
-		struct kbase_context *kctx, *n;
-
-		list_for_each_entry_safe(kctx, n,
-				&kbdev->js_data.ctx_list_unpullable[js],
-				jctx.sched_info.ctx.ctx_list_entry[js]) {
-			struct kbasep_js_kctx_info *js_kctx_info;
+		for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+			struct kbase_context *kctx, *n;
 			unsigned long flags;
-			bool timer_sync = false;
 
-			js_kctx_info = &kctx->jctx.sched_info;
-
-			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
-			mutex_lock(&js_devdata->runpool_mutex);
 			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
-			if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
-				kbase_js_ctx_pullable(kctx, js, false))
-				timer_sync =
-					kbase_js_ctx_list_add_pullable_nolock(
-							kbdev, kctx, js);
+			list_for_each_entry_safe(kctx, n,
+				 &kbdev->js_data.ctx_list_unpullable[js][prio],
+				 jctx.sched_info.ctx.ctx_list_entry[js]) {
+				struct kbasep_js_kctx_info *js_kctx_info;
+				bool timer_sync = false;
+
+				/* Drop lock so we can take kctx mutexes */
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+
+				js_kctx_info = &kctx->jctx.sched_info;
+
+				mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+				mutex_lock(&js_devdata->runpool_mutex);
+				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+				if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+					kbase_js_ctx_pullable(kctx, js, false))
+					timer_sync =
+						kbase_js_ctx_list_add_pullable_nolock(
+								kbdev, kctx, js);
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+				if (timer_sync)
+					kbase_backend_ctx_count_changed(kbdev);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+				/* Take lock before accessing list again */
+				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			}
 			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-			if (timer_sync)
-				kbase_backend_ctx_count_changed(kbdev);
-			mutex_unlock(&js_devdata->runpool_mutex);
-			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 		}
 	}
 	mutex_unlock(&js_devdata->queue_mutex);
@@ -2119,6 +2222,7 @@
 	}
 
 	kbase_ctx_flag_set(kctx, KCTX_PULLED);
+	kbase_ctx_flag_set(kctx, (KCTX_PULLED_SINCE_ACTIVE_JS0 << js));
 
 	pulled = atomic_inc_return(&kctx->atoms_pulled);
 	if (pulled == 1 && !kctx->slots_pullable) {
@@ -2155,7 +2259,6 @@
 	bool context_idle = false;
 	unsigned long flags;
 	base_jd_core_req core_req = katom->core_req;
-	u64 affinity = katom->affinity;
 	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
 
 	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(katom);
@@ -2246,8 +2349,7 @@
 
 	kbase_js_sched_all(kbdev);
 
-	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
-			coreref_state);
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, coreref_state);
 }
 
 void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
@@ -2420,20 +2522,22 @@
 void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 {
 	struct kbasep_js_device_data *js_devdata;
-	struct kbase_context *last_active;
+	struct kbase_context *last_active[BASE_JM_MAX_NR_SLOTS];
 	bool timer_sync = false;
-	bool ctx_waiting = false;
+	bool ctx_waiting[BASE_JM_MAX_NR_SLOTS];
+	int js;
 
 	js_devdata = &kbdev->js_data;
 
 	down(&js_devdata->schedule_sem);
 	mutex_lock(&js_devdata->queue_mutex);
 
-	last_active = kbdev->hwaccess.active_kctx;
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		last_active[js] = kbdev->hwaccess.active_kctx[js];
+		ctx_waiting[js] = false;
+	}
 
 	while (js_mask) {
-		int js;
-
 		js = ffs(js_mask) - 1;
 
 		while (1) {
@@ -2470,7 +2574,7 @@
 				kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
 			}
 
-			if (!kbase_js_use_ctx(kbdev, kctx)) {
+			if (!kbase_js_use_ctx(kbdev, kctx, js)) {
 				mutex_lock(
 					&kctx->jctx.sched_info.ctx.jsctx_mutex);
 				/* Context can not be used at this time */
@@ -2515,7 +2619,10 @@
 				 * Unless this context is already 'active', in
 				 * which case it's effectively already scheduled
 				 * so push it to the back of the list. */
-				if (pullable && kctx == last_active)
+				if (pullable && kctx == last_active[js] &&
+						kbase_ctx_flag(kctx,
+						(KCTX_PULLED_SINCE_ACTIVE_JS0 <<
+						js)))
 					timer_sync |=
 					kbase_js_ctx_list_add_pullable_nolock(
 							kctx->kbdev,
@@ -2537,10 +2644,10 @@
 				 * marker to prevent it from submitting atoms in
 				 * the IRQ handler, which would prevent this
 				 * context from making progress. */
-				if (last_active && kctx != last_active &&
-						kbase_js_ctx_pullable(
-						last_active, js, true))
-					ctx_waiting = true;
+				if (last_active[js] && kctx != last_active[js]
+						&& kbase_js_ctx_pullable(
+						last_active[js], js, true))
+					ctx_waiting[js] = true;
 
 				if (context_idle) {
 					kbase_jm_idle_ctx(kbdev, kctx);
@@ -2580,8 +2687,11 @@
 	if (timer_sync)
 		kbase_js_sync_timers(kbdev);
 
-	if (kbdev->hwaccess.active_kctx == last_active && ctx_waiting)
-		kbdev->hwaccess.active_kctx = NULL;
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		if (kbdev->hwaccess.active_kctx[js] == last_active[js] &&
+				ctx_waiting[js])
+			kbdev->hwaccess.active_kctx[js] = NULL;
+	}
 
 	mutex_unlock(&js_devdata->queue_mutex);
 	up(&js_devdata->schedule_sem);
@@ -2647,12 +2757,16 @@
 	 *    handled when it leaves the runpool.
 	 */
 	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
 			if (!list_empty(
 				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
 				list_del_init(
 				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
 		}
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 		/* The following events require us to kill off remaining jobs
 		 * and update PM book-keeping:
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h
index aa930b9..355da27 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_js.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_js.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -596,6 +596,27 @@
  */
 void kbase_js_set_timeouts(struct kbase_device *kbdev);
 
+/**
+ * kbase_js_set_ctx_priority - set the context priority
+ * @kctx: Context pointer
+ * @new_priority: New priority value for the Context
+ *
+ * The context priority is set to a new value and it is moved to the
+ * pullable/unpullable list as per the new priority.
+ */
+void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority);
+
+
+/**
+ * kbase_js_update_ctx_priority - update the context priority
+ * @kctx: Context pointer
+ *
+ * The context priority gets updated as per the priority of atoms currently in
+ * use for that context, but only if system priority mode for context scheduling
+ * is being used.
+ */
+void kbase_js_update_ctx_priority(struct kbase_context *kctx);
+
 /*
  * Helpers follow
  */
@@ -641,7 +662,8 @@
 
 	set_bit = (u16) (1u << kctx->as_nr);
 
-	dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+	dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)",
+			kctx, kctx->as_nr);
 
 	js_devdata->runpool_irq.submit_allowed |= set_bit;
 }
@@ -666,7 +688,8 @@
 	clear_bit = (u16) (1u << kctx->as_nr);
 	clear_mask = ~clear_bit;
 
-	dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+	dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)",
+			kctx, kctx->as_nr);
 
 	js_devdata->runpool_irq.submit_allowed &= clear_mask;
 }
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
index a54b6f3..7385daa 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
@@ -146,6 +146,48 @@
 /** Combination of KBASE_JS_ATOM_DONE_<...> bits */
 typedef u32 kbasep_js_atom_done_code;
 
+/*
+ * Context scheduling mode defines for kbase_device::js_ctx_scheduling_mode
+ */
+enum {
+	/*
+	 * In this mode, the context containing higher priority atoms will be
+	 * scheduled first and also the new runnable higher priority atoms can
+	 * preempt lower priority atoms currently running on the GPU, even if
+	 * they belong to a different context.
+	 */
+	KBASE_JS_SYSTEM_PRIORITY_MODE = 0,
+
+	/*
+	 * In this mode, the contexts are scheduled in round-robin fashion and
+	 * the new runnable higher priority atoms can preempt the lower priority
+	 * atoms currently running on the GPU, only if they belong to the same
+	 * context.
+	 */
+	KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE,
+
+	/* Must be the last in the enum */
+	KBASE_JS_PRIORITY_MODE_COUNT,
+};
+
+/*
+ * Internal atom priority defines for kbase_jd_atom::sched_prio
+ */
+enum {
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
+	KBASE_JS_ATOM_SCHED_PRIO_MED,
+	KBASE_JS_ATOM_SCHED_PRIO_LOW,
+	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
+};
+
+/* Invalid priority for kbase_jd_atom::sched_prio */
+#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
+
+/* Default priority in the case of contexts with no atoms, or being lenient
+ * about invalid priorities from userspace.
+ */
+#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
+
 /**
  * @brief KBase Device Data Job Scheduler sub-structure
  *
@@ -229,12 +271,12 @@
 	/**
 	 * List of contexts that can currently be pulled from
 	 */
-	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS];
+	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
 	/**
 	 * List of contexts that can not currently be pulled from, but have
 	 * jobs currently running.
 	 */
-	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS];
+	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
 
 	/** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
 	s8 nr_user_contexts_running;
@@ -365,22 +407,6 @@
  */
 #define KBASEP_JS_TICK_RESOLUTION_US 1
 
-/*
- * Internal atom priority defines for kbase_jd_atom::sched_prio
- */
-enum {
-	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
-	KBASE_JS_ATOM_SCHED_PRIO_MED,
-	KBASE_JS_ATOM_SCHED_PRIO_LOW,
-	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
-};
-
-/* Invalid priority for kbase_jd_atom::sched_prio */
-#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
-
-/* Default priority in the case of contexts with no atoms, or being lenient
- * about invalid priorities from userspace */
-#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
 
 	  /** @} *//* end group kbase_js */
 	  /** @} *//* end group base_kbase_api */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c
index a0897fb..3eff83a 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,9 +29,6 @@
 #ifdef CONFIG_DMA_SHARED_BUFFER
 #include <linux/dma-buf.h>
 #endif				/* CONFIG_DMA_SHARED_BUFFER */
-#ifdef CONFIG_UMP
-#include <linux/ump.h>
-#endif				/* CONFIG_UMP */
 #include <linux/kernel.h>
 #include <linux/bug.h>
 #include <linux/compat.h>
@@ -45,29 +42,34 @@
 #include <mali_kbase_hw.h>
 #include <mali_kbase_tlstream.h>
 
-/* This function finds out which RB tree the given GPU VA region belongs to
- * based on the region zone */
-static struct rb_root *kbase_reg_flags_to_rbtree(struct kbase_context *kctx,
-						    struct kbase_va_region *reg)
+/* Forward declarations */
+static void free_partial_locked(struct kbase_context *kctx,
+		struct kbase_mem_pool *pool, struct tagged_addr tp);
+
+static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx)
 {
-	struct rb_root *rbtree = NULL;
+#if defined(CONFIG_ARM64)
+	/* VA_BITS can be as high as 48 bits, but all bits are available for
+	 * both user and kernel.
+	 */
+	size_t cpu_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	/* x86_64 can access 48 bits of VA, but the 48th is used to denote
+	 * kernel (1) vs userspace (0), so the max here is 47.
+	 */
+	size_t cpu_va_bits = 47;
+#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32)
+	size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE;
+#else
+#error "Unknown CPU VA width for this architecture"
+#endif
 
-	switch (reg->flags & KBASE_REG_ZONE_MASK) {
-	case KBASE_REG_ZONE_CUSTOM_VA:
-		rbtree = &kctx->reg_rbtree_custom;
-		break;
-	case KBASE_REG_ZONE_EXEC:
-		rbtree = &kctx->reg_rbtree_exec;
-		break;
-	case KBASE_REG_ZONE_SAME_VA:
-		rbtree = &kctx->reg_rbtree_same;
-		/* fall through */
-	default:
-		rbtree = &kctx->reg_rbtree_same;
-		break;
-	}
+#ifdef CONFIG_64BIT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		cpu_va_bits = 32;
+#endif
 
-	return rbtree;
+	return cpu_va_bits;
 }
 
 /* This function finds out which RB tree the given pfn from the GPU VA belongs
@@ -82,8 +84,6 @@
 #endif /* CONFIG_64BIT */
 		if (gpu_pfn >= KBASE_REG_ZONE_CUSTOM_VA_BASE)
 			rbtree = &kctx->reg_rbtree_custom;
-		else if (gpu_pfn >= KBASE_REG_ZONE_EXEC_BASE)
-			rbtree = &kctx->reg_rbtree_exec;
 		else
 			rbtree = &kctx->reg_rbtree_same;
 #ifdef CONFIG_64BIT
@@ -99,15 +99,14 @@
 }
 
 /* This function inserts a region into the tree. */
-static void kbase_region_tracker_insert(struct kbase_context *kctx,
-						struct kbase_va_region *new_reg)
+static void kbase_region_tracker_insert(struct kbase_va_region *new_reg)
 {
 	u64 start_pfn = new_reg->start_pfn;
 	struct rb_node **link = NULL;
 	struct rb_node *parent = NULL;
 	struct rb_root *rbtree = NULL;
 
-	rbtree = kbase_reg_flags_to_rbtree(kctx, new_reg);
+	rbtree = new_reg->rbtree;
 
 	link = &(rbtree->rb_node);
 	/* Find the right place in the tree using tree search */
@@ -132,18 +131,13 @@
 	rb_insert_color(&(new_reg->rblink), rbtree);
 }
 
-/* Find allocated region enclosing free range. */
-static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free(
-		struct kbase_context *kctx, u64 start_pfn, size_t nr_pages)
+static struct kbase_va_region *find_region_enclosing_range_rbtree(
+		struct rb_root *rbtree, u64 start_pfn, size_t nr_pages)
 {
-	struct rb_node *rbnode = NULL;
-	struct kbase_va_region *reg = NULL;
-	struct rb_root *rbtree = NULL;
-
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
 	u64 end_pfn = start_pfn + nr_pages;
 
-	rbtree = kbase_gpu_va_to_rbtree(kctx, start_pfn);
-
 	rbnode = rbtree->rb_node;
 
 	while (rbnode) {
@@ -166,19 +160,12 @@
 	return NULL;
 }
 
-/* Find region enclosing given address. */
-struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr)
+struct kbase_va_region *kbase_find_region_enclosing_address(
+		struct rb_root *rbtree, u64 gpu_addr)
 {
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
 	struct rb_node *rbnode;
 	struct kbase_va_region *reg;
-	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
-	struct rb_root *rbtree = NULL;
-
-	KBASE_DEBUG_ASSERT(NULL != kctx);
-
-	lockdep_assert_held(&kctx->reg_lock);
-
-	rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
 
 	rbnode = rbtree->rb_node;
 
@@ -202,14 +189,11 @@
 	return NULL;
 }
 
-KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
-
-/* Find region with given base address */
-struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr)
+/* Find region enclosing given address. */
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(
+		struct kbase_context *kctx, u64 gpu_addr)
 {
 	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
-	struct rb_node *rbnode = NULL;
-	struct kbase_va_region *reg = NULL;
 	struct rb_root *rbtree = NULL;
 
 	KBASE_DEBUG_ASSERT(NULL != kctx);
@@ -218,6 +202,18 @@
 
 	rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
 
+	return kbase_find_region_enclosing_address(rbtree, gpu_addr);
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
+
+struct kbase_va_region *kbase_find_region_base_address(
+		struct rb_root *rbtree, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_node *rbnode = NULL;
+	struct kbase_va_region *reg = NULL;
+
 	rbnode = rbtree->rb_node;
 
 	while (rbnode) {
@@ -234,11 +230,25 @@
 	return NULL;
 }
 
+/* Find region with given base address */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_root *rbtree = NULL;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+
+	return kbase_find_region_base_address(rbtree, gpu_addr);
+}
+
 KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address);
 
 /* Find region meeting given requirements */
 static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(
-		struct kbase_context *kctx, struct kbase_va_region *reg_reqs,
+		struct kbase_va_region *reg_reqs,
 		size_t nr_pages, size_t align_offset, size_t align_mask,
 		u64 *out_start_pfn)
 {
@@ -248,11 +258,9 @@
 
 	/* Note that this search is a linear search, as we do not have a target
 	   address in mind, so does not benefit from the rbtree search */
-	rbtree = kbase_reg_flags_to_rbtree(kctx, reg_reqs);
+	rbtree = reg_reqs->rbtree;
 
-	rbnode = rb_first(rbtree);
-
-	while (rbnode) {
+	for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) {
 		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
 		if ((reg->nr_pages >= nr_pages) &&
 				(reg->flags & KBASE_REG_FREE)) {
@@ -268,6 +276,27 @@
 			start_pfn += align_mask;
 			start_pfn -= (start_pfn - align_offset) & (align_mask);
 
+			if (!(reg_reqs->flags & KBASE_REG_GPU_NX)) {
+				/* Can't end at 4GB boundary */
+				if (0 == ((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB))
+					start_pfn += align_offset;
+
+				/* Can't start at 4GB boundary */
+				if (0 == (start_pfn & BASE_MEM_PFN_MASK_4GB))
+					start_pfn += align_offset;
+
+				if (!((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB) ||
+				    !(start_pfn & BASE_MEM_PFN_MASK_4GB))
+					continue;
+			} else if (reg_reqs->flags &
+					KBASE_REG_GPU_VA_SAME_4GB_PAGE) {
+				u64 end_pfn = start_pfn + nr_pages - 1;
+
+				if ((start_pfn & ~BASE_MEM_PFN_MASK_4GB) !=
+				    (end_pfn & ~BASE_MEM_PFN_MASK_4GB))
+					start_pfn = end_pfn & ~BASE_MEM_PFN_MASK_4GB;
+			}
+
 			if ((start_pfn >= reg->start_pfn) &&
 					(start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
 					((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) {
@@ -275,7 +304,6 @@
 				return reg;
 			}
 		}
-		rbnode = rb_next(rbnode);
 	}
 
 	return NULL;
@@ -289,7 +317,7 @@
  * region lock held. The associated memory is not released (see
  * kbase_free_alloced_region). Internal use only.
  */
-static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+int kbase_remove_va_region(struct kbase_va_region *reg)
 {
 	struct rb_node *rbprev;
 	struct kbase_va_region *prev = NULL;
@@ -301,7 +329,7 @@
 	int merged_back = 0;
 	int err = 0;
 
-	reg_rbtree = kbase_reg_flags_to_rbtree(kctx, reg);
+	reg_rbtree = reg->rbtree;
 
 	/* Try to merge with the previous block first */
 	rbprev = rb_prev(&(reg->rblink));
@@ -347,7 +375,9 @@
 		 */
 		struct kbase_va_region *free_reg;
 
-		free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK);
+		free_reg = kbase_alloc_free_region(reg_rbtree,
+				reg->start_pfn, reg->nr_pages,
+				reg->flags & KBASE_REG_ZONE_MASK);
 		if (!free_reg) {
 			err = -ENOMEM;
 			goto out;
@@ -362,14 +392,21 @@
 KBASE_EXPORT_TEST_API(kbase_remove_va_region);
 
 /**
- * @brief Insert a VA region to the list, replacing the current at_reg.
+ * kbase_insert_va_region_nolock - Insert a VA region to the list,
+ * replacing the existing one.
+ *
+ * @new_reg: The new region to insert
+ * @at_reg: The region to replace
+ * @start_pfn: The Page Frame Number to insert at
+ * @nr_pages: The number of pages of the region
  */
-static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
+		struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
 {
 	struct rb_root *reg_rbtree = NULL;
 	int err = 0;
 
-	reg_rbtree = kbase_reg_flags_to_rbtree(kctx, at_reg);
+	reg_rbtree = at_reg->rbtree;
 
 	/* Must be a free region */
 	KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0);
@@ -393,19 +430,19 @@
 		KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages);
 		at_reg->nr_pages -= nr_pages;
 
-		kbase_region_tracker_insert(kctx, new_reg);
+		kbase_region_tracker_insert(new_reg);
 	}
 	/* New region replaces the end of the old one, so insert after. */
 	else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) {
 		at_reg->nr_pages -= nr_pages;
 
-		kbase_region_tracker_insert(kctx, new_reg);
+		kbase_region_tracker_insert(new_reg);
 	}
 	/* New region splits the old one, so insert and create new */
 	else {
 		struct kbase_va_region *new_front_reg;
 
-		new_front_reg = kbase_alloc_free_region(kctx,
+		new_front_reg = kbase_alloc_free_region(reg_rbtree,
 				at_reg->start_pfn,
 				start_pfn - at_reg->start_pfn,
 				at_reg->flags & KBASE_REG_ZONE_MASK);
@@ -414,8 +451,8 @@
 			at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
 			at_reg->start_pfn = start_pfn + nr_pages;
 
-			kbase_region_tracker_insert(kctx, new_front_reg);
-			kbase_region_tracker_insert(kctx, new_reg);
+			kbase_region_tracker_insert(new_front_reg);
+			kbase_region_tracker_insert(new_reg);
 		} else {
 			err = -ENOMEM;
 		}
@@ -425,83 +462,44 @@
 }
 
 /**
- * @brief Add a VA region to the list.
+ * kbase_add_va_region - Add a VA region to the region list for a context.
+ *
+ * @kctx: kbase context containing the region
+ * @reg: the region to add
+ * @addr: the address to insert the region at
+ * @nr_pages: the number of pages in the region
+ * @align: the minimum alignment in pages
  */
 int kbase_add_va_region(struct kbase_context *kctx,
 		struct kbase_va_region *reg, u64 addr,
 		size_t nr_pages, size_t align)
 {
-	struct kbase_va_region *tmp;
-	u64 gpu_pfn = addr >> PAGE_SHIFT;
 	int err = 0;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int cpu_va_bits = kbase_get_num_cpu_va_bits(kctx);
+	int gpu_pc_bits =
+		kbdev->gpu_props.props.core_props.log2_program_counter_size;
 
 	KBASE_DEBUG_ASSERT(NULL != kctx);
 	KBASE_DEBUG_ASSERT(NULL != reg);
 
 	lockdep_assert_held(&kctx->reg_lock);
 
-	if (!align)
-		align = 1;
-
-	/* must be a power of 2 */
-	KBASE_DEBUG_ASSERT(is_power_of_2(align));
-	KBASE_DEBUG_ASSERT(nr_pages > 0);
-
-	/* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */
-	if (gpu_pfn) {
-		struct device *dev = kctx->kbdev->dev;
-
-		KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
-
-		tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages);
-		if (!tmp) {
-			dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
-			err = -ENOMEM;
-			goto exit;
+	/* The executable allocation from the SAME_VA zone would already have an
+	 * appropriately aligned GPU VA chosen for it.
+	 */
+	if (!(reg->flags & KBASE_REG_GPU_NX) && !addr) {
+		if (cpu_va_bits > gpu_pc_bits) {
+			align = max(align, (size_t)((1ULL << gpu_pc_bits)
+						>> PAGE_SHIFT));
 		}
-		if (!(tmp->flags & KBASE_REG_FREE)) {
-			dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK);
-			dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages);
-			dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align);
-			err = -ENOMEM;
-			goto exit;
-		}
-
-		err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages);
-		if (err) {
-			dev_warn(dev, "Failed to insert va region");
-			err = -ENOMEM;
-			goto exit;
-		}
-
-		goto exit;
 	}
 
-	/* Path 2: Map any free address which meets the requirements.
-	 *
-	 * Depending on the zone the allocation request is for
-	 * we might need to retry it. */
 	do {
-		u64 start_pfn;
-		size_t align_offset = align;
-		size_t align_mask = align - 1;
-
-		if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) {
-			WARN(align > 1,
-					"kbase_add_va_region with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory",
-					(unsigned long)align);
-			align_mask  = reg->extent - 1;
-			align_offset = reg->extent - reg->initial_commit;
-		}
-
-		tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg,
-				nr_pages, align_offset, align_mask,
-				&start_pfn);
-		if (tmp) {
-			err = kbase_insert_va_region_nolock(kctx, reg, tmp,
-					start_pfn, nr_pages);
+		err = kbase_add_va_region_rbtree(kbdev, reg, addr, nr_pages,
+				align);
+		if (err != -ENOMEM)
 			break;
-		}
 
 		/*
 		 * If the allocation is not from the same zone as JIT
@@ -513,36 +511,118 @@
 			break;
 	} while (kbase_jit_evict(kctx));
 
-	if (!tmp)
-		err = -ENOMEM;
-
- exit:
 	return err;
 }
 
 KBASE_EXPORT_TEST_API(kbase_add_va_region);
 
 /**
+ * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree
+ *
+ * Insert a region into the rbtree that was specified when the region was
+ * created. If addr is 0 a free area in the rbtree is used, otherwise the
+ * specified address is used.
+ *
+ * @kbdev: The kbase device
+ * @reg: The region to add
+ * @addr: The address to add the region at, or 0 to map at any available address
+ * @nr_pages: The size of the region in pages
+ * @align: The minimum alignment in pages
+ */
+int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
+		struct kbase_va_region *reg,
+		u64 addr, size_t nr_pages, size_t align)
+{
+	struct rb_root *rbtree = NULL;
+	struct kbase_va_region *tmp;
+	u64 gpu_pfn = addr >> PAGE_SHIFT;
+	int err = 0;
+
+	rbtree = reg->rbtree;
+
+	if (!align)
+		align = 1;
+
+	/* must be a power of 2 */
+	KBASE_DEBUG_ASSERT(is_power_of_2(align));
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+
+	/* Path 1: Map a specific address. Find the enclosing region,
+	 * which *must* be free.
+	 */
+	if (gpu_pfn) {
+		struct device *dev = kbdev->dev;
+
+		KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
+
+		tmp = find_region_enclosing_range_rbtree(rbtree, gpu_pfn,
+				nr_pages);
+		if (!tmp) {
+			dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
+			err = -ENOMEM;
+			goto exit;
+		}
+		if (!(tmp->flags & KBASE_REG_FREE)) {
+			dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n",
+					tmp->start_pfn, tmp->flags,
+					tmp->nr_pages, gpu_pfn, nr_pages);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn,
+				nr_pages);
+		if (err) {
+			dev_warn(dev, "Failed to insert va region");
+			err = -ENOMEM;
+		}
+	} else {
+		/* Path 2: Map any free address which meets the requirements. */
+		u64 start_pfn;
+		size_t align_offset = align;
+		size_t align_mask = align - 1;
+
+		if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) {
+			WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory",
+					__func__,
+					(unsigned long)align);
+			align_mask  = reg->extent - 1;
+			align_offset = reg->extent - reg->initial_commit;
+		}
+
+		tmp = kbase_region_tracker_find_region_meeting_reqs(reg,
+				nr_pages, align_offset, align_mask,
+				&start_pfn);
+		if (tmp) {
+			err = kbase_insert_va_region_nolock(reg, tmp,
+							start_pfn, nr_pages);
+		} else {
+			err = -ENOMEM;
+		}
+	}
+
+exit:
+	return err;
+}
+
+/**
  * @brief Initialize the internal region tracker data structure.
  */
 static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
 		struct kbase_va_region *same_va_reg,
-		struct kbase_va_region *exec_reg,
 		struct kbase_va_region *custom_va_reg)
 {
 	kctx->reg_rbtree_same = RB_ROOT;
-	kbase_region_tracker_insert(kctx, same_va_reg);
+	kbase_region_tracker_insert(same_va_reg);
 
-	/* Although exec and custom_va_reg don't always exist,
+	/* Although custom_va_reg doesn't always exist,
 	 * initialize unconditionally because of the mem_view debugfs
-	 * implementation which relies on these being empty */
-	kctx->reg_rbtree_exec = RB_ROOT;
+	 * implementation which relies on this being empty.
+	 */
 	kctx->reg_rbtree_custom = RB_ROOT;
 
-	if (exec_reg)
-		kbase_region_tracker_insert(kctx, exec_reg);
 	if (custom_va_reg)
-		kbase_region_tracker_insert(kctx, custom_va_reg);
+		kbase_region_tracker_insert(custom_va_reg);
 }
 
 static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
@@ -563,19 +643,28 @@
 void kbase_region_tracker_term(struct kbase_context *kctx)
 {
 	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
-	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
 	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
 }
 
+void kbase_region_tracker_term_rbtree(struct rb_root *rbtree)
+{
+	kbase_region_tracker_erase_rbtree(rbtree);
+}
+
+static size_t kbase_get_same_va_bits(struct kbase_context *kctx)
+{
+	return min(kbase_get_num_cpu_va_bits(kctx),
+			(size_t) kctx->kbdev->gpu_props.mmu.va_bits);
+}
+
 /**
  * Initialize the region tracker data structure.
  */
 int kbase_region_tracker_init(struct kbase_context *kctx)
 {
 	struct kbase_va_region *same_va_reg;
-	struct kbase_va_region *exec_reg = NULL;
 	struct kbase_va_region *custom_va_reg = NULL;
-	size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE;
+	size_t same_va_bits = kbase_get_same_va_bits(kctx);
 	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
 	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
 	u64 same_va_pages;
@@ -584,29 +673,9 @@
 	/* Take the lock as kbase_free_alloced_region requires it */
 	kbase_gpu_vm_lock(kctx);
 
-#if defined(CONFIG_ARM64)
-	same_va_bits = VA_BITS;
-#elif defined(CONFIG_X86_64)
-	same_va_bits = 47;
-#elif defined(CONFIG_64BIT)
-#error Unsupported 64-bit architecture
-#endif
-
-#ifdef CONFIG_64BIT
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
-		same_va_bits = 32;
-	else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
-		same_va_bits = 33;
-#endif
-
-	if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) {
-		err = -EINVAL;
-		goto fail_unlock;
-	}
-
 	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
 	/* all have SAME_VA */
-	same_va_reg = kbase_alloc_free_region(kctx, 1,
+	same_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 1,
 			same_va_pages,
 			KBASE_REG_ZONE_SAME_VA);
 
@@ -616,7 +685,7 @@
 	}
 
 #ifdef CONFIG_64BIT
-	/* 32-bit clients have exec and custom VA zones */
+	/* 32-bit clients have custom VA zones */
 	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
 #endif
 		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
@@ -630,37 +699,27 @@
 		if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
 			custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
 
-		exec_reg = kbase_alloc_free_region(kctx,
-				KBASE_REG_ZONE_EXEC_BASE,
-				KBASE_REG_ZONE_EXEC_SIZE,
-				KBASE_REG_ZONE_EXEC);
-
-		if (!exec_reg) {
-			err = -ENOMEM;
-			goto fail_free_same_va;
-		}
-
-		custom_va_reg = kbase_alloc_free_region(kctx,
+		custom_va_reg = kbase_alloc_free_region(
+				&kctx->reg_rbtree_custom,
 				KBASE_REG_ZONE_CUSTOM_VA_BASE,
 				custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
 
 		if (!custom_va_reg) {
 			err = -ENOMEM;
-			goto fail_free_exec;
+			goto fail_free_same_va;
 		}
 #ifdef CONFIG_64BIT
 	}
 #endif
 
-	kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg);
+	kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg);
 
 	kctx->same_va_end = same_va_pages + 1;
 
+
 	kbase_gpu_vm_unlock(kctx);
 	return 0;
 
-fail_free_exec:
-	kbase_free_alloced_region(exec_reg);
 fail_free_same_va:
 	kbase_free_alloced_region(same_va_reg);
 fail_unlock:
@@ -668,33 +727,16 @@
 	return err;
 }
 
-int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages)
-{
 #ifdef CONFIG_64BIT
+static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
+		u64 jit_va_pages)
+{
 	struct kbase_va_region *same_va;
 	struct kbase_va_region *custom_va_reg;
-	u64 same_va_bits;
+	u64 same_va_bits = kbase_get_same_va_bits(kctx);
 	u64 total_va_size;
 	int err;
 
-	/*
-	 * Nothing to do for 32-bit clients, JIT uses the existing
-	 * custom VA zone.
-	 */
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
-		return 0;
-
-#if defined(CONFIG_ARM64)
-	same_va_bits = VA_BITS;
-#elif defined(CONFIG_X86_64)
-	same_va_bits = 47;
-#elif defined(CONFIG_64BIT)
-#error Unsupported 64-bit architecture
-#endif
-
-	if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
-		same_va_bits = 33;
-
 	total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
 
 	kbase_gpu_vm_lock(kctx);
@@ -732,7 +774,7 @@
 	 * Create a custom VA zone at the end of the VA for allocations which
 	 * JIT can use so it doesn't have to allocate VA from the kernel.
 	 */
-	custom_va_reg = kbase_alloc_free_region(kctx,
+	custom_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
 				kctx->same_va_end,
 				jit_va_pages,
 				KBASE_REG_ZONE_CUSTOM_VA);
@@ -746,7 +788,7 @@
 		goto fail_unlock;
 	}
 
-	kbase_region_tracker_insert(kctx, custom_va_reg);
+	kbase_region_tracker_insert(custom_va_reg);
 
 	kbase_gpu_vm_unlock(kctx);
 	return 0;
@@ -754,10 +796,29 @@
 fail_unlock:
 	kbase_gpu_vm_unlock(kctx);
 	return err;
-#else
-	return 0;
-#endif
 }
+#endif
+
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
+		u8 max_allocations, u8 trim_level)
+{
+	if (trim_level > 100)
+		return -EINVAL;
+
+	kctx->jit_max_allocations = max_allocations;
+	kctx->trim_level = trim_level;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
+#endif
+	/*
+	 * Nothing to do for 32-bit clients, JIT uses the existing
+	 * custom VA zone.
+	 */
+	return 0;
+}
+
 
 int kbase_mem_init(struct kbase_device *kbdev)
 {
@@ -824,14 +885,15 @@
  * The allocated object is not part of any list yet, and is flagged as
  * KBASE_REG_FREE. No mapping is allocated yet.
  *
- * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC
+ * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA.
  *
  */
-struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone)
+struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
+		u64 start_pfn, size_t nr_pages, int zone)
 {
 	struct kbase_va_region *new_reg;
 
-	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(rbtree != NULL);
 
 	/* zone argument should only contain zone related region flags */
 	KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
@@ -846,7 +908,7 @@
 
 	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
 	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
-	new_reg->kctx = kctx;
+	new_reg->rbtree = rbtree;
 	new_reg->flags = zone | KBASE_REG_FREE;
 
 	new_reg->flags |= KBASE_REG_GROWABLE;
@@ -861,6 +923,29 @@
 
 KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
 
+static struct kbase_context *kbase_reg_flags_to_kctx(
+		struct kbase_va_region *reg)
+{
+	struct kbase_context *kctx = NULL;
+	struct rb_root *rbtree = reg->rbtree;
+
+	switch (reg->flags & KBASE_REG_ZONE_MASK) {
+	case KBASE_REG_ZONE_CUSTOM_VA:
+		kctx = container_of(rbtree, struct kbase_context,
+				reg_rbtree_custom);
+		break;
+	case KBASE_REG_ZONE_SAME_VA:
+		kctx = container_of(rbtree, struct kbase_context,
+				reg_rbtree_same);
+		break;
+	default:
+		WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
+		break;
+	}
+
+	return kctx;
+}
+
 /**
  * @brief Free a region object.
  *
@@ -874,6 +959,14 @@
 void kbase_free_alloced_region(struct kbase_va_region *reg)
 {
 	if (!(reg->flags & KBASE_REG_FREE)) {
+		struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg);
+
+		if (WARN_ON(!kctx))
+			return;
+
+
+		mutex_lock(&kctx->jit_evict_lock);
+
 		/*
 		 * The physical allocation should have been removed from the
 		 * eviction list before this function is called. However, in the
@@ -882,6 +975,8 @@
 		 * on the list at termination time of the region tracker.
 		 */
 		if (!list_empty(&reg->gpu_alloc->evict_node)) {
+			mutex_unlock(&kctx->jit_evict_lock);
+
 			/*
 			 * Unlink the physical allocation before unmaking it
 			 * evictable so that the allocation isn't grown back to
@@ -904,13 +999,15 @@
 						   KBASE_MEM_TYPE_NATIVE);
 				kbase_mem_evictable_unmake(reg->gpu_alloc);
 			}
+		} else {
+			mutex_unlock(&kctx->jit_evict_lock);
 		}
 
 		/*
 		 * Remove the region from the sticky resource metadata
 		 * list should it be there.
 		 */
-		kbase_sticky_resource_release(reg->kctx, NULL,
+		kbase_sticky_resource_release(kctx, NULL,
 				reg->start_pfn << PAGE_SHIFT);
 
 		kbase_mem_phy_alloc_put(reg->cpu_alloc);
@@ -958,11 +1055,13 @@
 		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
 		for (i = 0; i < alloc->imported.alias.nents; i++) {
 			if (alloc->imported.alias.aliased[i].alloc) {
-				err = kbase_mmu_insert_pages(kctx,
+				err = kbase_mmu_insert_pages(kctx->kbdev,
+						&kctx->mmu,
 						reg->start_pfn + (i * stride),
 						alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
 						alloc->imported.alias.aliased[i].length,
-						reg->flags & gwt_mask);
+						reg->flags & gwt_mask,
+						kctx->as_nr);
 				if (err)
 					goto bad_insert;
 
@@ -979,10 +1078,13 @@
 			}
 		}
 	} else {
-		err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+		err = kbase_mmu_insert_pages(kctx->kbdev,
+				&kctx->mmu,
+				reg->start_pfn,
 				kbase_get_gpu_phy_pages(reg),
 				kbase_reg_current_backed_size(reg),
-				reg->flags & gwt_mask);
+				reg->flags & gwt_mask,
+				kctx->as_nr);
 		if (err)
 			goto bad_insert;
 		kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc);
@@ -998,12 +1100,16 @@
 		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
 		while (i--)
 			if (reg->gpu_alloc->imported.alias.aliased[i].alloc) {
-				kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length);
+				kbase_mmu_teardown_pages(kctx->kbdev,
+					&kctx->mmu,
+					reg->start_pfn + (i * stride),
+					reg->gpu_alloc->imported.alias.aliased[i].length,
+					kctx->as_nr);
 				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
 			}
 	}
 
-	kbase_remove_va_region(kctx, reg);
+	kbase_remove_va_region(reg);
 
 	return err;
 }
@@ -1023,13 +1129,16 @@
 	if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
 		size_t i;
 
-		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages);
+		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
+				reg->start_pfn, reg->nr_pages, kctx->as_nr);
 		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
 		for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++)
 			if (reg->gpu_alloc->imported.alias.aliased[i].alloc)
 				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
 	} else {
-		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg));
+		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
+			reg->start_pfn, kbase_reg_current_backed_size(reg),
+			kctx->as_nr);
 		kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
 	}
 
@@ -1050,7 +1159,7 @@
 	if (err)
 		return err;
 
-	err = kbase_remove_va_region(kctx, reg);
+	err = kbase_remove_va_region(reg);
 	return err;
 }
 
@@ -1332,7 +1441,7 @@
 	lockdep_assert_held(&kctx->reg_lock);
 
 	if (reg->flags & KBASE_REG_JIT) {
-		dev_warn(reg->kctx->kbdev->dev, "Attempt to free JIT memory!\n");
+		dev_warn(kctx->kbdev->dev, "Attempt to free JIT memory!\n");
 		return -EINVAL;
 	}
 
@@ -1359,7 +1468,7 @@
 
 	err = kbase_gpu_munmap(kctx, reg);
 	if (err) {
-		dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n");
+		dev_warn(kctx->kbdev->dev, "Could not unmap from the GPU...\n");
 		goto out;
 	}
 
@@ -1469,7 +1578,8 @@
 		reg->flags |= KBASE_REG_GPU_NX;
 
 	if (!kbase_device_is_cpu_coherent(kctx->kbdev)) {
-		if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED)
+		if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED &&
+				!(flags & BASE_MEM_UNCACHED_GPU))
 			return -EINVAL;
 	} else if (flags & (BASE_MEM_COHERENT_SYSTEM |
 			BASE_MEM_COHERENT_SYSTEM_REQUIRED)) {
@@ -1484,8 +1594,20 @@
 	if (flags & BASE_MEM_TILER_ALIGN_TOP)
 		reg->flags |= KBASE_REG_TILER_ALIGN_TOP;
 
+
 	/* Set up default MEMATTR usage */
-	if (kctx->kbdev->system_coherency == COHERENCY_ACE &&
+	if (!(reg->flags & KBASE_REG_GPU_CACHED)) {
+		if (kctx->kbdev->mmu_mode->flags &
+				KBASE_MMU_MODE_HAS_NON_CACHEABLE) {
+			/* Override shareability, and MEMATTR for uncached */
+			reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+			reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
+		} else {
+			dev_warn(kctx->kbdev->dev,
+				"Can't allocate GPU uncached memory due to MMU in Legacy Mode\n");
+			return -EINVAL;
+		}
+	} else if (kctx->kbdev->system_coherency == COHERENCY_ACE &&
 		(reg->flags & KBASE_REG_SHARE_BOTH)) {
 		reg->flags |=
 			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE);
@@ -1494,12 +1616,17 @@
 			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
 	}
 
+	if (flags & BASE_MEM_PERMANENT_KERNEL_MAPPING)
+		reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING;
+
+	if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE)
+		reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE;
+
 	return 0;
 }
 
-int kbase_alloc_phy_pages_helper(
-	struct kbase_mem_phy_alloc *alloc,
-	size_t nr_pages_requested)
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
+		size_t nr_pages_requested)
 {
 	int new_page_count __maybe_unused;
 	size_t nr_left = nr_pages_requested;
@@ -1508,14 +1635,14 @@
 	struct tagged_addr *tp;
 
 	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
-	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+	KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
 
 	if (alloc->reg) {
 		if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
 			goto invalid_request;
 	}
 
-	kctx = alloc->imported.kctx;
+	kctx = alloc->imported.native.kctx;
 
 	if (nr_pages_requested == 0)
 		goto done; /*nothing to do*/
@@ -1551,7 +1678,7 @@
 		if (nr_left) {
 			struct kbase_sub_alloc *sa, *temp_sa;
 
-			mutex_lock(&kctx->mem_partials_lock);
+			spin_lock(&kctx->mem_partials_lock);
 
 			list_for_each_entry_safe(sa, temp_sa,
 						 &kctx->mem_partials, link) {
@@ -1574,7 +1701,7 @@
 					}
 				}
 			}
-			mutex_unlock(&kctx->mem_partials_lock);
+			spin_unlock(&kctx->mem_partials_lock);
 		}
 
 		/* only if we actually have a chunk left <512. If more it indicates
@@ -1621,9 +1748,9 @@
 				nr_left = 0;
 
 				/* expose for later use */
-				mutex_lock(&kctx->mem_partials_lock);
+				spin_lock(&kctx->mem_partials_lock);
 				list_add(&sa->link, &kctx->mem_partials);
-				mutex_unlock(&kctx->mem_partials_lock);
+				spin_unlock(&kctx->mem_partials_lock);
 			}
 		}
 	}
@@ -1649,12 +1776,18 @@
 
 alloc_failed:
 	/* rollback needed if got one or more 2MB but failed later */
-	if (nr_left != nr_pages_requested)
-		kbase_mem_pool_free_pages(&kctx->lp_mem_pool,
-				  nr_pages_requested - nr_left,
-				  alloc->pages + alloc->nents,
-				  false,
-				  false);
+	if (nr_left != nr_pages_requested) {
+		size_t nr_pages_to_free = nr_pages_requested - nr_left;
+
+		alloc->nents += nr_pages_to_free;
+
+		kbase_process_page_usage_inc(kctx, nr_pages_to_free);
+		kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages);
+		kbase_atomic_add_pages(nr_pages_to_free,
+			       &kctx->kbdev->memdev.used_pages);
+
+		kbase_free_phy_pages_helper(alloc, nr_pages_to_free);
+	}
 
 	kbase_process_page_usage_dec(kctx, nr_pages_requested);
 	kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages);
@@ -1665,15 +1798,210 @@
 	return -ENOMEM;
 }
 
+struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
+		struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool,
+		size_t nr_pages_requested,
+		struct kbase_sub_alloc **prealloc_sa)
+{
+	int new_page_count __maybe_unused;
+	size_t nr_left = nr_pages_requested;
+	int res;
+	struct kbase_context *kctx;
+	struct tagged_addr *tp;
+	struct tagged_addr *new_pages = NULL;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
+
+	lockdep_assert_held(&pool->pool_lock);
+
+#if !defined(CONFIG_MALI_2MB_ALLOC)
+	WARN_ON(pool->order);
+#endif
+
+	if (alloc->reg) {
+		if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
+			goto invalid_request;
+	}
+
+	kctx = alloc->imported.native.kctx;
+
+	lockdep_assert_held(&kctx->mem_partials_lock);
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	new_page_count = kbase_atomic_add_pages(
+			nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_add_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer
+	 */
+	kbase_process_page_usage_inc(kctx, nr_pages_requested);
+
+	tp = alloc->pages + alloc->nents;
+	new_pages = tp;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	if (pool->order) {
+		int nr_lp = nr_left / (SZ_2M / SZ_4K);
+
+		res = kbase_mem_pool_alloc_pages_locked(pool,
+						 nr_lp * (SZ_2M / SZ_4K),
+						 tp);
+
+		if (res > 0) {
+			nr_left -= res;
+			tp += res;
+		}
+
+		if (nr_left) {
+			struct kbase_sub_alloc *sa, *temp_sa;
+
+			list_for_each_entry_safe(sa, temp_sa,
+						 &kctx->mem_partials, link) {
+				int pidx = 0;
+
+				while (nr_left) {
+					pidx = find_next_zero_bit(sa->sub_pages,
+								  SZ_2M / SZ_4K,
+								  pidx);
+					bitmap_set(sa->sub_pages, pidx, 1);
+					*tp++ = as_tagged_tag(page_to_phys(
+							sa->page + pidx),
+							FROM_PARTIAL);
+					nr_left--;
+
+					if (bitmap_full(sa->sub_pages,
+							SZ_2M / SZ_4K)) {
+						/* unlink from partial list when
+						 * full
+						 */
+						list_del_init(&sa->link);
+						break;
+					}
+				}
+			}
+		}
+
+		/* only if we actually have a chunk left <512. If more it
+		 * indicates that we couldn't allocate a 2MB above, so no point
+		 * to retry here.
+		 */
+		if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) {
+			/* create a new partial and suballocate the rest from it
+			 */
+			struct page *np = NULL;
+
+			np = kbase_mem_pool_alloc_locked(pool);
+
+			if (np) {
+				int i;
+				struct kbase_sub_alloc *const sa = *prealloc_sa;
+				struct page *p;
+
+				/* store pointers back to the control struct */
+				np->lru.next = (void *)sa;
+				for (p = np; p < np + SZ_2M / SZ_4K; p++)
+					p->lru.prev = (void *)np;
+				INIT_LIST_HEAD(&sa->link);
+				bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K);
+				sa->page = np;
+
+				for (i = 0; i < nr_left; i++)
+					*tp++ = as_tagged_tag(
+							page_to_phys(np + i),
+							FROM_PARTIAL);
+
+				bitmap_set(sa->sub_pages, 0, nr_left);
+				nr_left = 0;
+				/* Indicate to user that we'll free this memory
+				 * later.
+				 */
+				*prealloc_sa = NULL;
+
+				/* expose for later use */
+				list_add(&sa->link, &kctx->mem_partials);
+			}
+		}
+		if (nr_left)
+			goto alloc_failed;
+	} else {
+#endif
+		res = kbase_mem_pool_alloc_pages_locked(pool,
+						 nr_left,
+						 tp);
+		if (res <= 0)
+			goto alloc_failed;
+#ifdef CONFIG_MALI_2MB_ALLOC
+	}
+#endif
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+
+	alloc->nents += nr_pages_requested;
+done:
+	return new_pages;
+
+alloc_failed:
+	/* rollback needed if got one or more 2MB but failed later */
+	if (nr_left != nr_pages_requested) {
+		size_t nr_pages_to_free = nr_pages_requested - nr_left;
+
+		struct tagged_addr *start_free = alloc->pages + alloc->nents;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+		if (pool->order) {
+			while (nr_pages_to_free) {
+				if (is_huge_head(*start_free)) {
+					kbase_mem_pool_free_pages_locked(
+						pool, 512,
+						start_free,
+						false, /* not dirty */
+						true); /* return to pool */
+					nr_pages_to_free -= 512;
+					start_free += 512;
+				} else if (is_partial(*start_free)) {
+					free_partial_locked(kctx, pool,
+							*start_free);
+					nr_pages_to_free--;
+					start_free++;
+				}
+			}
+		} else {
+#endif
+			kbase_mem_pool_free_pages_locked(pool,
+					nr_pages_to_free,
+					start_free,
+					false, /* not dirty */
+					true); /* return to pool */
+#ifdef CONFIG_MALI_2MB_ALLOC
+		}
+#endif
+	}
+
+	kbase_process_page_usage_dec(kctx, nr_pages_requested);
+	kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
+
+invalid_request:
+	return NULL;
+}
+
 static void free_partial(struct kbase_context *kctx, struct tagged_addr tp)
 {
 	struct page *p, *head_page;
 	struct kbase_sub_alloc *sa;
 
-	p = phys_to_page(as_phys_addr_t(tp));
+	p = as_page(tp);
 	head_page = (struct page *)p->lru.prev;
 	sa = (struct kbase_sub_alloc *)head_page->lru.next;
-	mutex_lock(&kctx->mem_partials_lock);
+	spin_lock(&kctx->mem_partials_lock);
 	clear_bit(p - head_page, sa->sub_pages);
 	if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
 		list_del(&sa->link);
@@ -1684,14 +2012,14 @@
 		/* expose the partial again */
 		list_add(&sa->link, &kctx->mem_partials);
 	}
-	mutex_unlock(&kctx->mem_partials_lock);
+	spin_unlock(&kctx->mem_partials_lock);
 }
 
 int kbase_free_phy_pages_helper(
 	struct kbase_mem_phy_alloc *alloc,
 	size_t nr_pages_to_free)
 {
-	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_context *kctx = alloc->imported.native.kctx;
 	bool syncback;
 	bool reclaimed = (alloc->evicted != 0);
 	struct tagged_addr *start_free;
@@ -1699,7 +2027,7 @@
 	size_t freed = 0;
 
 	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
-	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+	KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
 	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
 
 	/* early out if nothing to do */
@@ -1776,6 +2104,124 @@
 	return 0;
 }
 
+static void free_partial_locked(struct kbase_context *kctx,
+		struct kbase_mem_pool *pool, struct tagged_addr tp)
+{
+	struct page *p, *head_page;
+	struct kbase_sub_alloc *sa;
+
+	lockdep_assert_held(&pool->pool_lock);
+	lockdep_assert_held(&kctx->mem_partials_lock);
+
+	p = as_page(tp);
+	head_page = (struct page *)p->lru.prev;
+	sa = (struct kbase_sub_alloc *)head_page->lru.next;
+	clear_bit(p - head_page, sa->sub_pages);
+	if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
+		list_del(&sa->link);
+		kbase_mem_pool_free_locked(pool, head_page, true);
+		kfree(sa);
+	} else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) ==
+		   SZ_2M / SZ_4K - 1) {
+		/* expose the partial again */
+		list_add(&sa->link, &kctx->mem_partials);
+	}
+}
+
+void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
+		struct kbase_mem_pool *pool, struct tagged_addr *pages,
+		size_t nr_pages_to_free)
+{
+	struct kbase_context *kctx = alloc->imported.native.kctx;
+	bool syncback;
+	bool reclaimed = (alloc->evicted != 0);
+	struct tagged_addr *start_free;
+	size_t freed = 0;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
+	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+	lockdep_assert_held(&pool->pool_lock);
+	lockdep_assert_held(&kctx->mem_partials_lock);
+
+	/* early out if nothing to do */
+	if (!nr_pages_to_free)
+		return;
+
+	start_free = pages;
+
+	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	/* pad start_free to a valid start location */
+	while (nr_pages_to_free && is_huge(*start_free) &&
+	       !is_huge_head(*start_free)) {
+		nr_pages_to_free--;
+		start_free++;
+	}
+
+	while (nr_pages_to_free) {
+		if (is_huge_head(*start_free)) {
+			/* This is a 2MB entry, so free all the 512 pages that
+			 * it points to
+			 */
+			WARN_ON(!pool->order);
+			kbase_mem_pool_free_pages_locked(pool,
+					512,
+					start_free,
+					syncback,
+					reclaimed);
+			nr_pages_to_free -= 512;
+			start_free += 512;
+			freed += 512;
+		} else if (is_partial(*start_free)) {
+			WARN_ON(!pool->order);
+			free_partial_locked(kctx, pool, *start_free);
+			nr_pages_to_free--;
+			start_free++;
+			freed++;
+		} else {
+			struct tagged_addr *local_end_free;
+
+			WARN_ON(pool->order);
+			local_end_free = start_free;
+			while (nr_pages_to_free &&
+			       !is_huge(*local_end_free) &&
+			       !is_partial(*local_end_free)) {
+				local_end_free++;
+				nr_pages_to_free--;
+			}
+			kbase_mem_pool_free_pages_locked(pool,
+					local_end_free - start_free,
+					start_free,
+					syncback,
+					reclaimed);
+			freed += local_end_free - start_free;
+			start_free += local_end_free - start_free;
+		}
+	}
+
+	alloc->nents -= freed;
+
+	/*
+	 * If the allocation was not evicted (i.e. evicted == 0) then
+	 * the page accounting needs to be done.
+	 */
+	if (!reclaimed) {
+		int new_page_count;
+
+		kbase_process_page_usage_dec(kctx, freed);
+		new_page_count = kbase_atomic_sub_pages(freed,
+							&kctx->used_pages);
+		kbase_atomic_sub_pages(freed,
+				       &kctx->kbdev->memdev.used_pages);
+
+		KBASE_TLSTREAM_AUX_PAGESALLOC(
+				kctx->id,
+				(u64)new_page_count);
+	}
+}
+
 void kbase_mem_kref_free(struct kref *kref)
 {
 	struct kbase_mem_phy_alloc *alloc;
@@ -1784,12 +2230,27 @@
 
 	switch (alloc->type) {
 	case KBASE_MEM_TYPE_NATIVE: {
-		WARN_ON(!alloc->imported.kctx);
-		/*
-		 * The physical allocation must have been removed from the
-		 * eviction list before trying to free it.
-		 */
-		WARN_ON(!list_empty(&alloc->evict_node));
+
+		if (!WARN_ON(!alloc->imported.native.kctx)) {
+			if (alloc->permanent_map)
+				kbase_phy_alloc_mapping_term(
+						alloc->imported.native.kctx,
+						alloc);
+
+			/*
+			 * The physical allocation must have been removed from
+			 * the eviction list before trying to free it.
+			 */
+			mutex_lock(
+				&alloc->imported.native.kctx->jit_evict_lock);
+			WARN_ON(!list_empty(&alloc->evict_node));
+			mutex_unlock(
+				&alloc->imported.native.kctx->jit_evict_lock);
+
+			kbase_process_page_usage_dec(
+					alloc->imported.native.kctx,
+					alloc->imported.native.nr_struct_pages);
+		}
 		kbase_free_phy_pages_helper(alloc, alloc->nents);
 		break;
 	}
@@ -1810,11 +2271,6 @@
 	case KBASE_MEM_TYPE_RAW:
 		/* raw pages, external cleanup */
 		break;
- #ifdef CONFIG_UMP
-	case KBASE_MEM_TYPE_IMPORTED_UMP:
-		ump_dd_release(alloc->imported.ump_handle);
-		break;
-#endif
 #ifdef CONFIG_DMA_SHARED_BUFFER
 	case KBASE_MEM_TYPE_IMPORTED_UMM:
 		dma_buf_detach(alloc->imported.umm.dma_buf,
@@ -1827,14 +2283,6 @@
 			mmdrop(alloc->imported.user_buf.mm);
 		kfree(alloc->imported.user_buf.pages);
 		break;
-	case KBASE_MEM_TYPE_TB:{
-		void *tb;
-
-		tb = alloc->imported.kctx->jctx.tb;
-		kbase_device_trace_buffer_uninstall(alloc->imported.kctx);
-		vfree(tb);
-		break;
-	}
 	default:
 		WARN(1, "Unexecpted free of type %d\n", alloc->type);
 		break;
@@ -1913,6 +2361,14 @@
 			BASE_MEM_TILER_ALIGN_TOP)))
 		return false;
 
+	/* To have an allocation lie within a 4GB chunk is required only for
+	 * TLS memory, which will never be used to contain executable code
+	 * and also used for Tiler heap.
+	 */
+	if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags &
+			(BASE_MEM_PROT_GPU_EX | BASE_MEM_TILER_ALIGN_TOP)))
+		return false;
+
 	/* GPU should have at least read or write access otherwise there is no
 	   reason for allocating. */
 	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
@@ -1983,9 +2439,8 @@
 		return -EINVAL;
 	}
 
-	if (va_pages > (U64_MAX / PAGE_SIZE)) {
-		/* 64-bit address range is the max */
-		dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than 64-bit address range!",
+	if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) {
+		dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!",
 				(unsigned long long)va_pages);
 		return -ENOMEM;
 	}
@@ -2044,6 +2499,13 @@
 #undef KBASE_MSG_PRE_FLAG
 	}
 
+	if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) &&
+	    (va_pages > (BASE_MEM_PFN_MASK_4GB + 1))) {
+		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GPU_VA_SAME_4GB_PAGE and va_pages==%lld greater than that needed for 4GB space",
+				(unsigned long long)va_pages);
+		return -EINVAL;
+	}
+
 	return 0;
 #undef KBASE_MSG_PRE
 }
@@ -2284,6 +2746,7 @@
 
 int kbase_jit_init(struct kbase_context *kctx)
 {
+	mutex_lock(&kctx->jit_evict_lock);
 	INIT_LIST_HEAD(&kctx->jit_active_head);
 	INIT_LIST_HEAD(&kctx->jit_pool_head);
 	INIT_LIST_HEAD(&kctx->jit_destroy_head);
@@ -2291,49 +2754,268 @@
 
 	INIT_LIST_HEAD(&kctx->jit_pending_alloc);
 	INIT_LIST_HEAD(&kctx->jit_atoms_head);
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	kctx->jit_max_allocations = 0;
+	kctx->jit_current_allocations = 0;
+	kctx->trim_level = 0;
 
 	return 0;
 }
 
+/* Check if the allocation from JIT pool is of the same size as the new JIT
+ * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets
+ * the alignment requirements.
+ */
+static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kctx,
+	struct kbase_va_region *walker, struct base_jit_alloc_info *info)
+{
+	bool meet_reqs = true;
+
+	if (walker->nr_pages != info->va_pages)
+		meet_reqs = false;
+	else if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) {
+		size_t align = info->extent;
+		size_t align_mask = align - 1;
+
+		if ((walker->start_pfn + info->commit_pages) & align_mask)
+			meet_reqs = false;
+	}
+
+	return meet_reqs;
+}
+
+static int kbase_jit_grow(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info, struct kbase_va_region *reg)
+{
+	size_t delta;
+	size_t pages_required;
+	size_t old_size;
+	struct kbase_mem_pool *pool;
+	int ret = -ENOMEM;
+	struct tagged_addr *gpu_pages;
+	struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
+	int i;
+
+	if (info->commit_pages > reg->nr_pages) {
+		/* Attempted to grow larger than maximum size */
+		return -EINVAL;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Make the physical backing no longer reclaimable */
+	if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
+		goto update_failed;
+
+	if (reg->gpu_alloc->nents >= info->commit_pages)
+		goto done;
+
+	/* Grow the backing */
+	old_size = reg->gpu_alloc->nents;
+
+	/* Allocate some more pages */
+	delta = info->commit_pages - reg->gpu_alloc->nents;
+	pages_required = delta;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	/* Preallocate memory for the sub-allocation structs */
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+		prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]),
+				GFP_KERNEL);
+		if (!prealloc_sas[i])
+			goto update_failed;
+	}
+
+	if (pages_required >= (SZ_2M / SZ_4K)) {
+		pool = &kctx->lp_mem_pool;
+		/* Round up to number of 2 MB pages required */
+		pages_required += ((SZ_2M / SZ_4K) - 1);
+		pages_required /= (SZ_2M / SZ_4K);
+	} else {
+#endif
+		pool = &kctx->mem_pool;
+#ifdef CONFIG_MALI_2MB_ALLOC
+	}
+#endif
+
+	if (reg->cpu_alloc != reg->gpu_alloc)
+		pages_required *= 2;
+
+	spin_lock(&kctx->mem_partials_lock);
+	kbase_mem_pool_lock(pool);
+
+	/* As we can not allocate memory from the kernel with the vm_lock held,
+	 * grow the pool to the required size with the lock dropped. We hold the
+	 * pool lock to prevent another thread from allocating from the pool
+	 * between the grow and allocation.
+	 */
+	while (kbase_mem_pool_size(pool) < pages_required) {
+		int pool_delta = pages_required - kbase_mem_pool_size(pool);
+
+		kbase_mem_pool_unlock(pool);
+		spin_unlock(&kctx->mem_partials_lock);
+		kbase_gpu_vm_unlock(kctx);
+
+		if (kbase_mem_pool_grow(pool, pool_delta))
+			goto update_failed_unlocked;
+
+		kbase_gpu_vm_lock(kctx);
+		spin_lock(&kctx->mem_partials_lock);
+		kbase_mem_pool_lock(pool);
+	}
+
+	gpu_pages = kbase_alloc_phy_pages_helper_locked(reg->gpu_alloc, pool,
+			delta, &prealloc_sas[0]);
+	if (!gpu_pages) {
+		kbase_mem_pool_unlock(pool);
+		spin_unlock(&kctx->mem_partials_lock);
+		goto update_failed;
+	}
+
+	if (reg->cpu_alloc != reg->gpu_alloc) {
+		struct tagged_addr *cpu_pages;
+
+		cpu_pages = kbase_alloc_phy_pages_helper_locked(reg->cpu_alloc,
+				pool, delta, &prealloc_sas[1]);
+		if (!cpu_pages) {
+			kbase_free_phy_pages_helper_locked(reg->gpu_alloc,
+					pool, gpu_pages, delta);
+			kbase_mem_pool_unlock(pool);
+			spin_unlock(&kctx->mem_partials_lock);
+			goto update_failed;
+		}
+	}
+	kbase_mem_pool_unlock(pool);
+	spin_unlock(&kctx->mem_partials_lock);
+
+	ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages,
+			old_size);
+	/*
+	 * The grow failed so put the allocation back in the
+	 * pool and return failure.
+	 */
+	if (ret)
+		goto update_failed;
+
+done:
+	ret = 0;
+
+	/* Update attributes of JIT allocation taken from the pool */
+	reg->initial_commit = info->commit_pages;
+	reg->extent = info->extent;
+
+update_failed:
+	kbase_gpu_vm_unlock(kctx);
+update_failed_unlocked:
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
+		kfree(prealloc_sas[i]);
+
+	return ret;
+}
+
 struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 		struct base_jit_alloc_info *info)
 {
 	struct kbase_va_region *reg = NULL;
-	struct kbase_va_region *walker;
-	struct kbase_va_region *temp;
-	size_t current_diff = SIZE_MAX;
 
-	int ret;
+	if (kctx->jit_current_allocations >= kctx->jit_max_allocations) {
+		/* Too many current allocations */
+		return NULL;
+	}
+	if (info->max_allocations > 0 &&
+			kctx->jit_current_allocations_per_bin[info->bin_id] >=
+			info->max_allocations) {
+		/* Too many current allocations in this bin */
+		return NULL;
+	}
 
 	mutex_lock(&kctx->jit_evict_lock);
+
 	/*
 	 * Scan the pool for an existing allocation which meets our
 	 * requirements and remove it.
 	 */
-	list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) {
+	if (info->usage_id != 0) {
+		/* First scan for an allocation with the same usage ID */
+		struct kbase_va_region *walker;
+		struct kbase_va_region *temp;
+		size_t current_diff = SIZE_MAX;
 
-		if (walker->nr_pages >= info->va_pages) {
-			size_t min_size, max_size, diff;
+		list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head,
+				jit_node) {
 
-			/*
-			 * The JIT allocations VA requirements have been
-			 * meet, it's suitable but other allocations
-			 * might be a better fit.
-			 */
-			min_size = min_t(size_t, walker->gpu_alloc->nents,
-					info->commit_pages);
-			max_size = max_t(size_t, walker->gpu_alloc->nents,
-					info->commit_pages);
-			diff = max_size - min_size;
+			if (walker->jit_usage_id == info->usage_id &&
+					walker->jit_bin_id == info->bin_id &&
+					meet_size_and_tiler_align_top_requirements(
+							kctx, walker, info)) {
+				size_t min_size, max_size, diff;
 
-			if (current_diff > diff) {
-				current_diff = diff;
-				reg = walker;
+				/*
+				 * The JIT allocations VA requirements have been
+				 * met, it's suitable but other allocations
+				 * might be a better fit.
+				 */
+				min_size = min_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				max_size = max_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				diff = max_size - min_size;
+
+				if (current_diff > diff) {
+					current_diff = diff;
+					reg = walker;
+				}
+
+				/* The allocation is an exact match */
+				if (current_diff == 0)
+					break;
 			}
+		}
+	}
 
-			/* The allocation is an exact match, stop looking */
-			if (current_diff == 0)
-				break;
+	if (!reg) {
+		/* No allocation with the same usage ID, or usage IDs not in
+		 * use. Search for an allocation we can reuse.
+		 */
+		struct kbase_va_region *walker;
+		struct kbase_va_region *temp;
+		size_t current_diff = SIZE_MAX;
+
+		list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head,
+				jit_node) {
+
+			if (walker->jit_bin_id == info->bin_id &&
+					meet_size_and_tiler_align_top_requirements(
+							kctx, walker, info)) {
+				size_t min_size, max_size, diff;
+
+				/*
+				 * The JIT allocations VA requirements have been
+				 * met, it's suitable but other allocations
+				 * might be a better fit.
+				 */
+				min_size = min_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				max_size = max_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				diff = max_size - min_size;
+
+				if (current_diff > diff) {
+					current_diff = diff;
+					reg = walker;
+				}
+
+				/* The allocation is an exact match, so stop
+				 * looking.
+				 */
+				if (current_diff == 0)
+					break;
+			}
 		}
 	}
 
@@ -2352,42 +3034,15 @@
 		list_del_init(&reg->gpu_alloc->evict_node);
 		mutex_unlock(&kctx->jit_evict_lock);
 
-		kbase_gpu_vm_lock(kctx);
-
-		/* Make the physical backing no longer reclaimable */
-		if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
-			goto update_failed;
-
-		/* Grow the backing if required */
-		if (reg->gpu_alloc->nents < info->commit_pages) {
-			size_t delta;
-			size_t old_size = reg->gpu_alloc->nents;
-
-			/* Allocate some more pages */
-			delta = info->commit_pages - reg->gpu_alloc->nents;
-			if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta)
-					!= 0)
-				goto update_failed;
-
-			if (reg->cpu_alloc != reg->gpu_alloc) {
-				if (kbase_alloc_phy_pages_helper(
-						reg->cpu_alloc, delta) != 0) {
-					kbase_free_phy_pages_helper(
-							reg->gpu_alloc, delta);
-					goto update_failed;
-				}
-			}
-
-			ret = kbase_mem_grow_gpu_mapping(kctx, reg,
-					info->commit_pages, old_size);
+		if (kbase_jit_grow(kctx, info, reg) < 0) {
 			/*
-			 * The grow failed so put the allocation back in the
-			 * pool and return failure.
+			 * An update to an allocation from the pool failed,
+			 * chances are slim a new allocation would fair any
+			 * better so return the allocation to the pool and
+			 * return the function with failure.
 			 */
-			if (ret)
-				goto update_failed;
+			goto update_failed_unlocked;
 		}
-		kbase_gpu_vm_unlock(kctx);
 	} else {
 		/* No suitable JIT allocation was found so create a new one */
 		u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
@@ -2397,6 +3052,9 @@
 
 		mutex_unlock(&kctx->jit_evict_lock);
 
+		if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)
+			flags |= BASE_MEM_TILER_ALIGN_TOP;
+
 		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
 				info->extent, &flags, &gpu_addr);
 		if (!reg)
@@ -2409,15 +3067,15 @@
 		mutex_unlock(&kctx->jit_evict_lock);
 	}
 
+	kctx->jit_current_allocations++;
+	kctx->jit_current_allocations_per_bin[info->bin_id]++;
+
+	reg->jit_usage_id = info->usage_id;
+	reg->jit_bin_id = info->bin_id;
+
 	return reg;
 
-update_failed:
-	/*
-	 * An update to an allocation from the pool failed, chances
-	 * are slim a new allocation would fair any better so return
-	 * the allocation to the pool and return the function with failure.
-	 */
-	kbase_gpu_vm_unlock(kctx);
+update_failed_unlocked:
 	mutex_lock(&kctx->jit_evict_lock);
 	list_move(&reg->jit_node, &kctx->jit_pool_head);
 	mutex_unlock(&kctx->jit_evict_lock);
@@ -2427,19 +3085,62 @@
 
 void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
 {
-	/* The physical backing of memory in the pool is always reclaimable */
+	u64 old_pages;
+
+	/* Get current size of JIT region */
+	old_pages = kbase_reg_current_backed_size(reg);
+	if (reg->initial_commit < old_pages) {
+		/* Free trim_level % of region, but don't go below initial
+		 * commit size
+		 */
+		u64 new_size = MAX(reg->initial_commit,
+			div_u64(old_pages * (100 - kctx->trim_level), 100));
+		u64 delta = old_pages - new_size;
+
+		if (delta) {
+			kbase_mem_shrink_cpu_mapping(kctx, reg, old_pages-delta,
+					old_pages);
+			kbase_mem_shrink_gpu_mapping(kctx, reg, old_pages-delta,
+					old_pages);
+
+			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				kbase_free_phy_pages_helper(reg->gpu_alloc,
+						delta);
+		}
+	}
+
+	kctx->jit_current_allocations--;
+	kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--;
+
+	kbase_mem_evictable_mark_reclaim(reg->gpu_alloc);
+
 	kbase_gpu_vm_lock(kctx);
-	kbase_mem_evictable_make(reg->gpu_alloc);
+	reg->flags |= KBASE_REG_DONT_NEED;
+	kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents);
 	kbase_gpu_vm_unlock(kctx);
 
+	/*
+	 * Add the allocation to the eviction list and the jit pool, after this
+	 * point the shrink can reclaim it, or it may be reused.
+	 */
 	mutex_lock(&kctx->jit_evict_lock);
+
+	/* This allocation can't already be on a list. */
+	WARN_ON(!list_empty(&reg->gpu_alloc->evict_node));
+	list_add(&reg->gpu_alloc->evict_node, &kctx->evict_list);
+
 	list_move(&reg->jit_node, &kctx->jit_pool_head);
+
 	mutex_unlock(&kctx->jit_evict_lock);
 }
 
 void kbase_jit_backing_lost(struct kbase_va_region *reg)
 {
-	struct kbase_context *kctx = reg->kctx;
+	struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg);
+
+	if (WARN_ON(!kctx))
+		return;
 
 	lockdep_assert_held(&kctx->jit_evict_lock);
 
@@ -2472,6 +3173,7 @@
 		reg = list_entry(kctx->jit_pool_head.prev,
 				struct kbase_va_region, jit_node);
 		list_del(&reg->jit_node);
+		list_del_init(&reg->gpu_alloc->evict_node);
 	}
 	mutex_unlock(&kctx->jit_evict_lock);
 
@@ -2489,12 +3191,6 @@
 
 	/* Free all allocations for this context */
 
-	/*
-	 * Flush the freeing of allocations whose backing has been freed
-	 * (i.e. everything in jit_destroy_head).
-	 */
-	cancel_work_sync(&kctx->jit_work);
-
 	kbase_gpu_vm_lock(kctx);
 	mutex_lock(&kctx->jit_evict_lock);
 	/* Free all allocations from the pool */
@@ -2502,6 +3198,7 @@
 		walker = list_first_entry(&kctx->jit_pool_head,
 				struct kbase_va_region, jit_node);
 		list_del(&walker->jit_node);
+		list_del_init(&walker->gpu_alloc->evict_node);
 		mutex_unlock(&kctx->jit_evict_lock);
 		walker->flags &= ~KBASE_REG_JIT;
 		kbase_mem_free_region(kctx, walker);
@@ -2513,6 +3210,7 @@
 		walker = list_first_entry(&kctx->jit_active_head,
 				struct kbase_va_region, jit_node);
 		list_del(&walker->jit_node);
+		list_del_init(&walker->gpu_alloc->evict_node);
 		mutex_unlock(&kctx->jit_evict_lock);
 		walker->flags &= ~KBASE_REG_JIT;
 		kbase_mem_free_region(kctx, walker);
@@ -2520,6 +3218,12 @@
 	}
 	mutex_unlock(&kctx->jit_evict_lock);
 	kbase_gpu_vm_unlock(kctx);
+
+	/*
+	 * Flush the freeing of allocations whose backing has been freed
+	 * (i.e. everything in jit_destroy_head).
+	 */
+	cancel_work_sync(&kctx->jit_work);
 }
 
 static int kbase_jd_user_buf_map(struct kbase_context *kctx,
@@ -2610,9 +3314,9 @@
 		gwt_mask = ~KBASE_REG_GPU_WR;
 #endif
 
-	err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa,
-			kbase_reg_current_backed_size(reg),
-			reg->flags & gwt_mask);
+	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+			pa, kbase_reg_current_backed_size(reg),
+			reg->flags & gwt_mask, kctx->as_nr);
 	if (err == 0)
 		return 0;
 
@@ -2670,6 +3374,7 @@
 	int err;
 	size_t count = 0;
 	struct kbase_mem_phy_alloc *alloc;
+	unsigned long gwt_mask = ~0;
 
 	alloc = reg->gpu_alloc;
 
@@ -2718,10 +3423,17 @@
 	/* Update nents as we now have pages to map */
 	alloc->nents = reg->nr_pages;
 
-	err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+#ifdef CONFIG_MALI_JOB_DUMP
+	if (kctx->gwt_enabled)
+		gwt_mask = ~KBASE_REG_GPU_WR;
+#endif
+
+	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
 			kbase_get_gpu_phy_pages(reg),
 			count,
-			reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
+			(reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD) &
+			 gwt_mask,
+			kctx->as_nr);
 	if (err)
 		goto err_unmap_attachment;
 
@@ -2739,7 +3451,8 @@
 	return 0;
 
 err_teardown_orig_pages:
-	kbase_mmu_teardown_pages(kctx, reg->start_pfn, count);
+	kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+			count, kctx->as_nr);
 err_unmap_attachment:
 	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
 			alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
@@ -2784,9 +3497,6 @@
 		}
 	}
 	break;
-	case KBASE_MEM_TYPE_IMPORTED_UMP: {
-		break;
-	}
 #ifdef CONFIG_DMA_SHARED_BUFFER
 	case KBASE_MEM_TYPE_IMPORTED_UMM: {
 		reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
@@ -2822,9 +3532,11 @@
 				int err;
 
 				err = kbase_mmu_teardown_pages(
-						kctx,
+						kctx->kbdev,
+						&kctx->mmu,
 						reg->start_pfn,
-						alloc->nents);
+						alloc->nents,
+						kctx->as_nr);
 				WARN_ON(err);
 			}
 
@@ -2841,9 +3553,11 @@
 
 			if (reg && reg->gpu_alloc == alloc)
 				kbase_mmu_teardown_pages(
-						kctx,
+						kctx->kbdev,
+						&kctx->mmu,
 						reg->start_pfn,
-						kbase_reg_current_backed_size(reg));
+						kbase_reg_current_backed_size(reg),
+						kctx->as_nr);
 
 			if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
 				writeable = false;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h
index 36de381..901f1cf 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -35,9 +35,6 @@
 #endif
 
 #include <linux/kref.h>
-#ifdef CONFIG_UMP
-#include <linux/ump.h>
-#endif				/* CONFIG_UMP */
 #include "mali_base_kernel.h"
 #include <mali_kbase_hw.h>
 #include "mali_kbase_pm.h"
@@ -48,6 +45,9 @@
 /* Required for kbase_mem_evictable_unmake */
 #include "mali_kbase_mem_linux.h"
 
+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx,
+		int pages);
+
 /* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
 #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2)	/* round to 4 pages */
 
@@ -77,11 +77,9 @@
 
 enum kbase_memory_type {
 	KBASE_MEM_TYPE_NATIVE,
-	KBASE_MEM_TYPE_IMPORTED_UMP,
 	KBASE_MEM_TYPE_IMPORTED_UMM,
 	KBASE_MEM_TYPE_IMPORTED_USER_BUF,
 	KBASE_MEM_TYPE_ALIAS,
-	KBASE_MEM_TYPE_TB,
 	KBASE_MEM_TYPE_RAW
 };
 
@@ -129,13 +127,13 @@
 	/* type of buffer */
 	enum kbase_memory_type type;
 
+	/* Kernel side mapping of the alloc */
+	struct kbase_vmap_struct *permanent_map;
+
 	unsigned long properties;
 
 	/* member in union valid based on @a type */
 	union {
-#ifdef CONFIG_UMP
-		ump_dd_handle ump_handle;
-#endif /* CONFIG_UMP */
 #if defined(CONFIG_DMA_SHARED_BUFFER)
 		struct {
 			struct dma_buf *dma_buf;
@@ -149,8 +147,13 @@
 			size_t nents;
 			struct kbase_aliased *aliased;
 		} alias;
-		/* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */
-		struct kbase_context *kctx;
+		struct {
+			struct kbase_context *kctx;
+			/* Number of pages in this structure, including *pages.
+			 * Used for kernel memory tracking.
+			 */
+			size_t nr_struct_pages;
+		} native;
 		struct kbase_alloc_import_user_buf {
 			unsigned long address;
 			unsigned long size;
@@ -204,8 +207,7 @@
  */
 static inline bool kbase_mem_is_imported(enum kbase_memory_type type)
 {
-	return (type == KBASE_MEM_TYPE_IMPORTED_UMP) ||
-		(type == KBASE_MEM_TYPE_IMPORTED_UMM) ||
+	return (type == KBASE_MEM_TYPE_IMPORTED_UMM) ||
 		(type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
 }
 
@@ -234,7 +236,7 @@
 	struct rb_node rblink;
 	struct list_head link;
 
-	struct kbase_context *kctx;	/* Backlink to base context */
+	struct rb_root *rbtree;	/* Backlink to rb tree */
 
 	u64 start_pfn;		/* The PFN in GPU space */
 	size_t nr_pages;
@@ -252,14 +254,18 @@
 #define KBASE_REG_GPU_NX            (1ul << 3)
 /* Is CPU cached? */
 #define KBASE_REG_CPU_CACHED        (1ul << 4)
-/* Is GPU cached? */
+/* Is GPU cached?
+ * Some components within the GPU might only be able to access memory that is
+ * GPU cacheable. Refer to the specific GPU implementation for more details.
+ */
 #define KBASE_REG_GPU_CACHED        (1ul << 5)
 
 #define KBASE_REG_GROWABLE          (1ul << 6)
 /* Can grow on pf? */
 #define KBASE_REG_PF_GROW           (1ul << 7)
 
-/* Bit 8 is unused */
+/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */
+#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8)
 
 /* inner shareable coherency */
 #define KBASE_REG_SHARE_IN          (1ul << 9)
@@ -299,30 +305,26 @@
 /* Memory is handled by JIT - user space should not be able to free it */
 #define KBASE_REG_JIT               (1ul << 24)
 
+/* Memory has permanent kernel side mapping */
+#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25)
+
 #define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
 
 /* only used with 32-bit clients */
 /*
- * On a 32bit platform, custom VA should be wired from (4GB + shader region)
+ * On a 32bit platform, custom VA should be wired from 4GB
  * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface
  * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference).
  * So we put the default limit to the maximum possible on Linux and shrink
  * it down, if required by the GPU, during initialization.
  */
 
-/*
- * Dedicated 16MB region for shader code:
- * VA range 0x101000000-0x102000000
- */
-#define KBASE_REG_ZONE_EXEC         KBASE_REG_ZONE(1)
-#define KBASE_REG_ZONE_EXEC_BASE    (0x101000000ULL >> PAGE_SHIFT)
-#define KBASE_REG_ZONE_EXEC_SIZE    ((16ULL * 1024 * 1024) >> PAGE_SHIFT)
-
-#define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(2)
-#define KBASE_REG_ZONE_CUSTOM_VA_BASE    (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */
+#define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(1)
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE    (0x100000000ULL >> PAGE_SHIFT)
 #define KBASE_REG_ZONE_CUSTOM_VA_SIZE    (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
 /* end 32-bit clients only */
 
+
 	unsigned long flags;
 
 	size_t extent; /* nr of pages alloc'd on PF */
@@ -332,6 +334,10 @@
 
 	/* List head used to store the region in the JIT allocation pool */
 	struct list_head jit_node;
+	/* The last JIT usage ID for this region */
+	u16 jit_usage_id;
+	/* The JIT bin this allocation came from */
+	u8 jit_bin_id;
 };
 
 /* Common functions */
@@ -373,7 +379,9 @@
 
 #define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */
 
-static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type)
+static inline struct kbase_mem_phy_alloc *kbase_alloc_create(
+		struct kbase_context *kctx, size_t nr_pages,
+		enum kbase_memory_type type)
 {
 	struct kbase_mem_phy_alloc *alloc;
 	size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
@@ -403,6 +411,13 @@
 	if (!alloc)
 		return ERR_PTR(-ENOMEM);
 
+	if (type == KBASE_MEM_TYPE_NATIVE) {
+		alloc->imported.native.nr_struct_pages =
+				(alloc_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+		kbase_process_page_usage_inc(kctx,
+				alloc->imported.native.nr_struct_pages);
+	}
+
 	/* Store allocation method */
 	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
 		alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE;
@@ -429,29 +444,38 @@
 	KBASE_DEBUG_ASSERT(!reg->gpu_alloc);
 	KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE);
 
-	reg->cpu_alloc = kbase_alloc_create(reg->nr_pages,
+	reg->cpu_alloc = kbase_alloc_create(kctx, reg->nr_pages,
 			KBASE_MEM_TYPE_NATIVE);
 	if (IS_ERR(reg->cpu_alloc))
 		return PTR_ERR(reg->cpu_alloc);
 	else if (!reg->cpu_alloc)
 		return -ENOMEM;
-	reg->cpu_alloc->imported.kctx = kctx;
-	INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
+
+	reg->cpu_alloc->imported.native.kctx = kctx;
 	if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE)
 	    && (reg->flags & KBASE_REG_CPU_CACHED)) {
-		reg->gpu_alloc = kbase_alloc_create(reg->nr_pages,
+		reg->gpu_alloc = kbase_alloc_create(kctx, reg->nr_pages,
 				KBASE_MEM_TYPE_NATIVE);
-		reg->gpu_alloc->imported.kctx = kctx;
-		INIT_LIST_HEAD(&reg->gpu_alloc->evict_node);
+		if (IS_ERR_OR_NULL(reg->gpu_alloc)) {
+			kbase_mem_phy_alloc_put(reg->cpu_alloc);
+			return -ENOMEM;
+		}
+		reg->gpu_alloc->imported.native.kctx = kctx;
 	} else {
 		reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
 	}
 
+	mutex_lock(&kctx->jit_evict_lock);
+	INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
+	INIT_LIST_HEAD(&reg->gpu_alloc->evict_node);
+	mutex_unlock(&kctx->jit_evict_lock);
+
 	reg->flags &= ~KBASE_REG_FREE;
+
 	return 0;
 }
 
-static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages)
+static inline u32 kbase_atomic_add_pages(u32 num_pages, atomic_t *used_pages)
 {
 	int new_val = atomic_add_return(num_pages, used_pages);
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
@@ -460,7 +484,7 @@
 	return new_val;
 }
 
-static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages)
+static inline u32 kbase_atomic_sub_pages(u32 num_pages, atomic_t *used_pages)
 {
 	int new_val = atomic_sub_return(num_pages, used_pages);
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
@@ -539,10 +563,26 @@
  * 3. Return NULL if no memory in the pool
  *
  * Return: Pointer to allocated page, or NULL if allocation failed.
+ *
+ * Note : This function should not be used if the pool lock is held. Use
+ * kbase_mem_pool_alloc_locked() instead.
  */
 struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool);
 
 /**
+ * kbase_mem_pool_alloc_locked - Allocate a page from memory pool
+ * @pool:  Memory pool to allocate from
+ *
+ * If there are free pages in the pool, this function allocates a page from
+ * @pool. This function does not use @next_pool.
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ *
+ * Note : Caller must hold the pool lock.
+ */
+struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool);
+
+/**
  * kbase_mem_pool_free - Free a page to memory pool
  * @pool:  Memory pool where page should be freed
  * @page:  Page to free to the pool
@@ -553,11 +593,28 @@
  * 2. Otherwise, if @next_pool is not NULL and not full, add @page to
  *    @next_pool.
  * 3. Finally, free @page to the kernel.
+ *
+ * Note : This function should not be used if the pool lock is held. Use
+ * kbase_mem_pool_free_locked() instead.
  */
 void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page,
 		bool dirty);
 
 /**
+ * kbase_mem_pool_free_locked - Free a page to memory pool
+ * @pool:  Memory pool where page should be freed
+ * @p:     Page to free to the pool
+ * @dirty: Whether some of the page may be dirty in the cache.
+ *
+ * If @pool is not full, this function adds @page to @pool. Otherwise, @page is
+ * freed to the kernel. This function does not use @next_pool.
+ *
+ * Note : Caller must hold the pool lock.
+ */
+void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty);
+
+/**
  * kbase_mem_pool_alloc_pages - Allocate pages from memory pool
  * @pool:     Memory pool to allocate from
  * @nr_pages: Number of pages to allocate
@@ -571,11 +628,58 @@
  * On success number of pages allocated (could be less than nr_pages if
  * partial_allowed).
  * On error an error code.
+ *
+ * Note : This function should not be used if the pool lock is held. Use
+ * kbase_mem_pool_alloc_pages_locked() instead.
+ *
+ * The caller must not hold vm_lock, as this could cause a deadlock if
+ * the kernel OoM killer runs. If the caller must allocate pages while holding
+ * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
  */
 int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
 		struct tagged_addr *pages, bool partial_allowed);
 
 /**
+ * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool
+ * @pool:        Memory pool to allocate from
+ * @nr_4k_pages: Number of pages to allocate
+ * @pages:       Pointer to array where the physical address of the allocated
+ *               pages will be stored.
+ *
+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages. This
+ * version does not allocate new pages from the kernel, and therefore will never
+ * trigger the OoM killer. Therefore, it can be run while the vm_lock is held.
+ *
+ * As new pages can not be allocated, the caller must ensure there are
+ * sufficient pages in the pool. Usage of this function should look like :
+ *
+ *   kbase_gpu_vm_lock(kctx);
+ *   kbase_mem_pool_lock(pool)
+ *   while (kbase_mem_pool_size(pool) < pages_required) {
+ *     kbase_mem_pool_unlock(pool)
+ *     kbase_gpu_vm_unlock(kctx);
+ *     kbase_mem_pool_grow(pool)
+ *     kbase_gpu_vm_lock(kctx);
+ *     kbase_mem_pool_lock(pool)
+ *   }
+ *   kbase_mem_pool_alloc_pages_locked(pool)
+ *   kbase_mem_pool_unlock(pool)
+ *   Perform other processing that requires vm_lock...
+ *   kbase_gpu_vm_unlock(kctx);
+ *
+ * This ensures that the pool can be grown to the required size and that the
+ * allocation can complete without another thread using the newly grown pages.
+ *
+ * Return:
+ * On success number of pages allocated.
+ * On error an error code.
+ *
+ * Note : Caller must hold the pool lock.
+ */
+int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_4k_pages, struct tagged_addr *pages);
+
+/**
  * kbase_mem_pool_free_pages - Free pages to memory pool
  * @pool:     Memory pool where pages should be freed
  * @nr_pages: Number of pages to free
@@ -591,6 +695,22 @@
 		struct tagged_addr *pages, bool dirty, bool reclaimed);
 
 /**
+ * kbase_mem_pool_free_pages_locked - Free pages to memory pool
+ * @pool:     Memory pool where pages should be freed
+ * @nr_pages: Number of pages to free
+ * @pages:    Pointer to array holding the physical addresses of the pages to
+ *            free.
+ * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
+ *
+ * Like kbase_mem_pool_free() but optimized for freeing many pages.
+ */
+void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_pages, struct tagged_addr *pages, bool dirty,
+		bool reclaimed);
+
+/**
  * kbase_mem_pool_size - Get number of free pages in memory pool
  * @pool:  Memory pool to inspect
  *
@@ -600,7 +720,7 @@
  */
 static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool)
 {
-	return ACCESS_ONCE(pool->cur_size);
+	return READ_ONCE(pool->cur_size);
 }
 
 /**
@@ -649,6 +769,15 @@
 void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
 
 /**
+ * kbase_mem_pool_mark_dying - Mark that this pool is dying
+ * @pool:     Memory pool
+ *
+ * This will cause any ongoing allocation operations (eg growing on page fault)
+ * to be terminated.
+ */
+void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool);
+
+/**
  * kbase_mem_alloc_page - Allocate a new page for a device
  * @pool:  Memory pool to allocate a page from
  *
@@ -660,21 +789,43 @@
 struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool);
 
 int kbase_region_tracker_init(struct kbase_context *kctx);
-int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages);
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
+		u8 max_allocations, u8 trim_level);
 void kbase_region_tracker_term(struct kbase_context *kctx);
 
-struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr);
+/**
+ * kbase_region_tracker_term_rbtree - Free memory for a region tracker
+ *
+ * This will free all the regions within the region tracker
+ *
+ * @rbtree: Region tracker tree root
+ */
+void kbase_region_tracker_term_rbtree(struct rb_root *rbtree);
+
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(
+		struct kbase_context *kctx, u64 gpu_addr);
+struct kbase_va_region *kbase_find_region_enclosing_address(
+		struct rb_root *rbtree, u64 gpu_addr);
 
 /**
  * @brief Check that a pointer is actually a valid region.
  *
  * Must be called with context lock held.
  */
-struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr);
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(
+		struct kbase_context *kctx, u64 gpu_addr);
+struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree,
+		u64 gpu_addr);
 
-struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone);
+struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
+		u64 start_pfn, size_t nr_pages, int zone);
 void kbase_free_alloced_region(struct kbase_va_region *reg);
-int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg,
+		u64 addr, size_t nr_pages, size_t align);
+int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
+		struct kbase_va_region *reg, u64 addr, size_t nr_pages,
+		size_t align);
+int kbase_remove_va_region(struct kbase_va_region *reg);
 
 bool kbase_check_alloc_flags(unsigned long flags);
 bool kbase_check_import_flags(unsigned long flags);
@@ -718,25 +869,44 @@
 
 int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
 
-int kbase_mmu_init(struct kbase_context *kctx);
-void kbase_mmu_term(struct kbase_context *kctx);
+/**
+ * kbase_mmu_init - Initialise an object representing GPU page tables
+ *
+ * The structure should be terminated using kbase_mmu_term()
+ *
+ * @kbdev: kbase device
+ * @mmut:  structure to initialise
+ * @kctx:  optional kbase context, may be NULL if this set of MMU tables is not
+ *         associated with a context
+ */
+int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+		struct kbase_context *kctx);
+/**
+ * kbase_mmu_term - Terminate an object representing GPU page tables
+ *
+ * This will free any page tables that have been allocated
+ *
+ * @kbdev: kbase device
+ * @mmut:  kbase_mmu_table to be destroyed
+ */
+void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut);
 
-phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx);
-void kbase_mmu_free_pgd(struct kbase_context *kctx);
-int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
-				  struct tagged_addr *phys, size_t nr,
-				  unsigned long flags);
-int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
-				  struct tagged_addr *phys, size_t nr,
-				  unsigned long flags);
+int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
+				    struct kbase_mmu_table *mmut,
+				    const u64 start_vpfn,
+				    struct tagged_addr *phys, size_t nr,
+				    unsigned long flags);
+int kbase_mmu_insert_pages(struct kbase_device *kbdev,
+			   struct kbase_mmu_table *mmut, u64 vpfn,
+			   struct tagged_addr *phys, size_t nr,
+			   unsigned long flags, int as_nr);
 int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 					struct tagged_addr phys, size_t nr,
 					unsigned long flags);
 
-int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr);
-int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
-					struct tagged_addr *phys, size_t nr,
-					unsigned long flags);
+int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
+			     struct kbase_mmu_table *mmut, u64 vpfn,
+			     size_t nr, int as_nr);
 int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
 			   struct tagged_addr *phys, size_t nr,
 			   unsigned long flags);
@@ -756,11 +926,19 @@
 int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
 
 /**
+ * kbase_mmu_update - Configure an address space on the GPU to the specified
+ *                    MMU tables
+ *
  * The caller has the following locking conditions:
  * - It must hold kbase_device->mmu_hw_mutex
  * - It must hold the hwaccess_lock
+ *
+ * @kbdev: Kbase device structure
+ * @mmut:  The set of MMU tables to be configured on the address space
+ * @as_nr: The address space to be configured
  */
-void kbase_mmu_update(struct kbase_context *kctx);
+void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+		int as_nr);
 
 /**
  * kbase_mmu_disable() - Disable the MMU for a previously active kbase context.
@@ -922,16 +1100,74 @@
 void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
 
 /**
-* @brief Allocates physical pages.
-*
-* Allocates \a nr_pages_requested and updates the alloc object.
-*
-* @param[in] alloc allocation object to add pages to
-* @param[in] nr_pages_requested number of physical pages to allocate
-*
-* @return 0 if all pages have been successfully allocated. Error code otherwise
-*/
-int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested);
+ * kbase_alloc_phy_pages_helper - Allocates physical pages.
+ * @alloc:              allocation object to add pages to
+ * @nr_pages_requested: number of physical pages to allocate
+ *
+ * Allocates \a nr_pages_requested and updates the alloc object.
+ *
+ * Return: 0 if all pages have been successfully allocated. Error code otherwise
+ *
+ * Note : The caller must not hold vm_lock, as this could cause a deadlock if
+ * the kernel OoM killer runs. If the caller must allocate pages while holding
+ * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
+ *
+ * This function cannot be used from interrupt context
+ */
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
+		size_t nr_pages_requested);
+
+/**
+ * kbase_alloc_phy_pages_helper_locked - Allocates physical pages.
+ * @alloc:              allocation object to add pages to
+ * @pool:               Memory pool to allocate from
+ * @nr_pages_requested: number of physical pages to allocate
+ * @prealloc_sa:        Information about the partial allocation if the amount
+ *                      of memory requested is not a multiple of 2MB. One
+ *                      instance of struct kbase_sub_alloc must be allocated by
+ *                      the caller iff CONFIG_MALI_2MB_ALLOC is enabled.
+ *
+ * Allocates \a nr_pages_requested and updates the alloc object. This function
+ * does not allocate new pages from the kernel, and therefore will never trigger
+ * the OoM killer. Therefore, it can be run while the vm_lock is held.
+ *
+ * As new pages can not be allocated, the caller must ensure there are
+ * sufficient pages in the pool. Usage of this function should look like :
+ *
+ *   kbase_gpu_vm_lock(kctx);
+ *   kbase_mem_pool_lock(pool)
+ *   while (kbase_mem_pool_size(pool) < pages_required) {
+ *     kbase_mem_pool_unlock(pool)
+ *     kbase_gpu_vm_unlock(kctx);
+ *     kbase_mem_pool_grow(pool)
+ *     kbase_gpu_vm_lock(kctx);
+ *     kbase_mem_pool_lock(pool)
+ *   }
+ *   kbase_alloc_phy_pages_helper_locked(pool)
+ *   kbase_mem_pool_unlock(pool)
+ *   Perform other processing that requires vm_lock...
+ *   kbase_gpu_vm_unlock(kctx);
+ *
+ * This ensures that the pool can be grown to the required size and that the
+ * allocation can complete without another thread using the newly grown pages.
+ *
+ * If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then
+ * @pool must be alloc->imported.native.kctx->lp_mem_pool. Otherwise it must be
+ * alloc->imported.native.kctx->mem_pool.
+ * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be
+ * pre-allocated because we must not sleep (due to the usage of kmalloc())
+ * whilst holding pool->pool_lock.
+ * @prealloc_sa shall be set to NULL if it has been consumed by this function
+ * to indicate that the caller must not free it.
+ *
+ * Return: Pointer to array of allocated pages. NULL on failure.
+ *
+ * Note : Caller must hold pool->pool_lock
+ */
+struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
+		struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool,
+		size_t nr_pages_requested,
+		struct kbase_sub_alloc **prealloc_sa);
 
 /**
 * @brief Free physical pages.
@@ -943,6 +1179,26 @@
 */
 int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free);
 
+/**
+ * kbase_free_phy_pages_helper_locked - Free pages allocated with
+ *                                      kbase_alloc_phy_pages_helper_locked()
+ * @alloc:            Allocation object to free pages from
+ * @pool:             Memory pool to return freed pages to
+ * @pages:            Pages allocated by kbase_alloc_phy_pages_helper_locked()
+ * @nr_pages_to_free: Number of physical pages to free
+ *
+ * This function atomically frees pages allocated with
+ * kbase_alloc_phy_pages_helper_locked(). @pages is the pointer to the page
+ * array that is returned by that function. @pool must be the pool that the
+ * pages were originally allocated from.
+ *
+ * If the mem_pool has been unlocked since the allocation then
+ * kbase_free_phy_pages_helper() should be used instead.
+ */
+void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
+		struct kbase_mem_pool *pool, struct tagged_addr *pages,
+		size_t nr_pages_to_free);
+
 static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
 {
 	SetPagePrivate(p);
@@ -1150,4 +1406,29 @@
  */
 void kbase_sticky_resource_term(struct kbase_context *kctx);
 
+/**
+ * kbase_mem_pool_lock - Lock a memory pool
+ * @pool: Memory pool to lock
+ */
+static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
+{
+	spin_lock(&pool->pool_lock);
+}
+
+/**
+ * kbase_mem_pool_lock - Release a memory pool
+ * @pool: Memory pool to lock
+ */
+static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool)
+{
+	spin_unlock(&pool->pool_lock);
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc);
+
+
 #endif				/* _KBASE_MEM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
index 4e6668e..bc95a0ff 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -47,44 +47,185 @@
 
 #include <mali_kbase.h>
 #include <mali_kbase_mem_linux.h>
-#include <mali_kbase_config_defaults.h>
 #include <mali_kbase_tlstream.h>
 #include <mali_kbase_ioctl.h>
 
+
+static int kbase_vmap_phy_pages(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 offset_bytes, size_t size,
+		struct kbase_vmap_struct *map);
+static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
+		struct kbase_vmap_struct *map);
+
 static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
 
-/**
- * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
- * @kctx:      Context the region belongs to
- * @reg:       The GPU region
- * @new_pages: The number of pages after the shrink
- * @old_pages: The number of pages before the shrink
- *
- * Shrink (or completely remove) all CPU mappings which reference the shrunk
- * part of the allocation.
- *
- * Note: Caller must be holding the processes mmap_sem lock.
+/* Retrieve the associated region pointer if the GPU address corresponds to
+ * one of the event memory pages. The enclosing region, if found, shouldn't
+ * have been marked as free.
  */
-static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
-		struct kbase_va_region *reg,
-		u64 new_pages, u64 old_pages);
+static struct kbase_va_region *kbase_find_event_mem_region(
+			struct kbase_context *kctx, u64 gpu_addr)
+{
+
+	return NULL;
+}
 
 /**
- * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
- * @kctx:      Context the region belongs to
- * @reg:       The GPU region or NULL if there isn't one
- * @new_pages: The number of pages after the shrink
- * @old_pages: The number of pages before the shrink
+ * kbase_phy_alloc_mapping_init - Initialize the kernel side permanent mapping
+ *                                of the physical allocation belonging to a
+ *                                region
+ * @kctx:  The kernel base context @reg belongs to.
+ * @reg:   The region whose physical allocation is to be mapped
+ * @vsize: The size of the requested region, in pages
+ * @size:  The size in pages initially committed to the region
  *
- * Return: 0 on success, negative -errno on error
+ * Return: 0 on success, otherwise an error code indicating failure
  *
- * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
- * itself is unmodified as we still need to reserve the VA, only the page tables
- * will be modified by this function.
+ * Maps the physical allocation backing a non-free @reg, so it may be
+ * accessed directly from the kernel. This is only supported for physical
+ * allocations of type KBASE_MEM_TYPE_NATIVE, and will fail for other types of
+ * physical allocation.
+ *
+ * The mapping is stored directly in the allocation that backs @reg. The
+ * refcount is not incremented at this point. Instead, use of the mapping should
+ * be surrounded by kbase_phy_alloc_mapping_get() and
+ * kbase_phy_alloc_mapping_put() to ensure it does not disappear whilst the
+ * client is accessing it.
+ *
+ * Both cached and uncached regions are allowed, but any sync operations are the
+ * responsibility of the client using the permanent mapping.
+ *
+ * A number of checks are made to ensure that a region that needs a permanent
+ * mapping can actually be supported:
+ * - The region must be created as fully backed
+ * - The region must not be growable
+ *
+ * This function will fail if those checks are not satisfied.
+ *
+ * On success, the region will also be forced into a certain kind:
+ * - It will no longer be growable
  */
-static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
-		struct kbase_va_region *reg,
-		u64 new_pages, u64 old_pages);
+static int kbase_phy_alloc_mapping_init(struct kbase_context *kctx,
+		struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+	size_t size_bytes = (size << PAGE_SHIFT);
+	struct kbase_vmap_struct *kern_mapping;
+	int err = 0;
+
+	/* Can only map in regions that are always fully committed
+	 * Don't setup the mapping twice
+	 * Only support KBASE_MEM_TYPE_NATIVE allocations
+	 */
+	if (vsize != size || reg->cpu_alloc->permanent_map != NULL ||
+			reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+		return -EINVAL;
+
+	if (size > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES -
+			kctx->permanent_mapped_pages)) {
+		dev_warn(kctx->kbdev->dev, "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %lu pages",
+				(u64)size,
+				KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES,
+				kctx->permanent_mapped_pages);
+		return -ENOMEM;
+	}
+
+	kern_mapping = kzalloc(sizeof(*kern_mapping), GFP_KERNEL);
+	if (!kern_mapping)
+		return -ENOMEM;
+
+	err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping);
+	if (err < 0)
+		goto vmap_fail;
+
+	/* No support for growing or shrinking mapped regions */
+	reg->flags &= ~KBASE_REG_GROWABLE;
+
+	reg->cpu_alloc->permanent_map = kern_mapping;
+	kctx->permanent_mapped_pages += size;
+
+	return 0;
+vmap_fail:
+	kfree(kern_mapping);
+	return err;
+}
+
+void kbase_phy_alloc_mapping_term(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc)
+{
+	WARN_ON(!alloc->permanent_map);
+	kbase_vunmap_phy_pages(kctx, alloc->permanent_map);
+	kfree(alloc->permanent_map);
+
+	alloc->permanent_map = NULL;
+
+	/* Mappings are only done on cpu_alloc, so don't need to worry about
+	 * this being reduced a second time if a separate gpu_alloc is
+	 * freed
+	 */
+	WARN_ON(alloc->nents > kctx->permanent_mapped_pages);
+	kctx->permanent_mapped_pages -= alloc->nents;
+}
+
+void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx,
+		u64 gpu_addr,
+		struct kbase_vmap_struct **out_kern_mapping)
+{
+	struct kbase_va_region *reg;
+	void *kern_mem_ptr = NULL;
+	struct kbase_vmap_struct *kern_mapping;
+	u64 mapping_offset;
+
+	WARN_ON(!kctx);
+	WARN_ON(!out_kern_mapping);
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* First do a quick lookup in the list of event memory regions */
+	reg = kbase_find_event_mem_region(kctx, gpu_addr);
+
+	if (!reg) {
+		reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx, gpu_addr);
+	}
+
+	if (reg == NULL || (reg->flags & KBASE_REG_FREE) != 0)
+		goto out_unlock;
+
+	kern_mapping = reg->cpu_alloc->permanent_map;
+	if (kern_mapping == NULL)
+		goto out_unlock;
+
+	mapping_offset = gpu_addr - (reg->start_pfn << PAGE_SHIFT);
+
+	/* Refcount the allocations to prevent them disappearing */
+	WARN_ON(reg->cpu_alloc != kern_mapping->cpu_alloc);
+	WARN_ON(reg->gpu_alloc != kern_mapping->gpu_alloc);
+	(void)kbase_mem_phy_alloc_get(kern_mapping->cpu_alloc);
+	(void)kbase_mem_phy_alloc_get(kern_mapping->gpu_alloc);
+
+	kern_mem_ptr = (void *)(uintptr_t)((uintptr_t)kern_mapping->addr + mapping_offset);
+	*out_kern_mapping = kern_mapping;
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return kern_mem_ptr;
+}
+
+void kbase_phy_alloc_mapping_put(struct kbase_context *kctx,
+		struct kbase_vmap_struct *kern_mapping)
+{
+	WARN_ON(!kctx);
+	WARN_ON(!kern_mapping);
+
+	WARN_ON(kctx != kern_mapping->cpu_alloc->imported.native.kctx);
+	WARN_ON(kern_mapping != kern_mapping->cpu_alloc->permanent_map);
+
+	kbase_mem_phy_alloc_put(kern_mapping->cpu_alloc);
+	kbase_mem_phy_alloc_put(kern_mapping->gpu_alloc);
+
+	/* kern_mapping and the gpu/cpu phy allocs backing it must not be used
+	 * from now on
+	 */
+}
 
 struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
 		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
@@ -92,6 +233,7 @@
 {
 	int zone;
 	struct kbase_va_region *reg;
+	struct rb_root *rbtree;
 	struct device *dev;
 
 	KBASE_DEBUG_ASSERT(kctx);
@@ -108,6 +250,11 @@
 		goto bad_flags;
 	}
 
+	if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 &&
+			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) {
+		/* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED;
+	}
 	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
 			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
 		dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
@@ -123,14 +270,15 @@
 		goto bad_sizes;
 
 	/* find out which VA zone to use */
-	if (*flags & BASE_MEM_SAME_VA)
+	if (*flags & BASE_MEM_SAME_VA) {
+		rbtree = &kctx->reg_rbtree_same;
 		zone = KBASE_REG_ZONE_SAME_VA;
-	else if (*flags & BASE_MEM_PROT_GPU_EX)
-		zone = KBASE_REG_ZONE_EXEC;
-	else
+	} else {
+		rbtree = &kctx->reg_rbtree_custom;
 		zone = KBASE_REG_ZONE_CUSTOM_VA;
+	}
 
-	reg = kbase_alloc_free_region(kctx, 0, va_pages, zone);
+	reg = kbase_alloc_free_region(rbtree, 0, va_pages, zone);
 	if (!reg) {
 		dev_err(dev, "Failed to allocate free region");
 		goto no_region;
@@ -162,6 +310,21 @@
 
 	kbase_gpu_vm_lock(kctx);
 
+	if (reg->flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) {
+		/* Permanent kernel mappings must happen as soon as
+		 * reg->cpu_alloc->pages is ready. Currently this happens after
+		 * kbase_alloc_phy_pages(). If we move that to setup pages
+		 * earlier, also move this call too
+		 */
+		int err = kbase_phy_alloc_mapping_init(kctx, reg, va_pages,
+				commit_pages);
+		if (err < 0) {
+			kbase_gpu_vm_unlock(kctx);
+			goto no_kern_mapping;
+		}
+	}
+
+
 	/* mmap needed to setup VA? */
 	if (*flags & BASE_MEM_SAME_VA) {
 		unsigned long prot = PROT_NONE;
@@ -230,6 +393,7 @@
 
 no_mmap:
 no_cookie:
+no_kern_mapping:
 no_mem:
 	kbase_mem_phy_alloc_put(reg->cpu_alloc);
 	kbase_mem_phy_alloc_put(reg->gpu_alloc);
@@ -243,7 +407,8 @@
 }
 KBASE_EXPORT_TEST_API(kbase_mem_alloc);
 
-int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * const out)
+int kbase_mem_query(struct kbase_context *kctx,
+		u64 gpu_addr, u64 query, u64 * const out)
 {
 	struct kbase_va_region *reg;
 	int ret = -EINVAL;
@@ -311,6 +476,10 @@
 		}
 		if (KBASE_REG_TILER_ALIGN_TOP & reg->flags)
 			*out |= BASE_MEM_TILER_ALIGN_TOP;
+		if (!(KBASE_REG_GPU_CACHED & reg->flags))
+			*out |= BASE_MEM_UNCACHED_GPU;
+		if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags)
+			*out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE;
 
 		WARN(*out & ~BASE_MEM_FLAGS_QUERYABLE,
 				"BASE_MEM_FLAGS_QUERYABLE needs updating\n");
@@ -471,9 +640,9 @@
  * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
  * @alloc: The physical allocation
  */
-static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
+void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
 {
-	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_context *kctx = alloc->imported.native.kctx;
 	int __maybe_unused new_page_count;
 
 	kbase_process_page_usage_dec(kctx, alloc->nents);
@@ -493,7 +662,7 @@
 static
 void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
 {
-	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_context *kctx = alloc->imported.native.kctx;
 	int __maybe_unused new_page_count;
 
 	new_page_count = kbase_atomic_add_pages(alloc->nents,
@@ -512,21 +681,21 @@
 
 int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
 {
-	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+	struct kbase_context *kctx = gpu_alloc->imported.native.kctx;
 
 	lockdep_assert_held(&kctx->reg_lock);
 
-	/* This alloction can't already be on a list. */
-	WARN_ON(!list_empty(&gpu_alloc->evict_node));
-
 	kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
 			0, gpu_alloc->nents);
 
+	mutex_lock(&kctx->jit_evict_lock);
+	/* This allocation can't already be on a list. */
+	WARN_ON(!list_empty(&gpu_alloc->evict_node));
+
 	/*
 	 * Add the allocation to the eviction list, after this point the shrink
 	 * can reclaim it.
 	 */
-	mutex_lock(&kctx->jit_evict_lock);
 	list_add(&gpu_alloc->evict_node, &kctx->evict_list);
 	mutex_unlock(&kctx->jit_evict_lock);
 	kbase_mem_evictable_mark_reclaim(gpu_alloc);
@@ -537,16 +706,18 @@
 
 bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
 {
-	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+	struct kbase_context *kctx = gpu_alloc->imported.native.kctx;
 	int err = 0;
 
 	lockdep_assert_held(&kctx->reg_lock);
 
+	mutex_lock(&kctx->jit_evict_lock);
 	/*
 	 * First remove the allocation from the eviction list as it's no
 	 * longer eligible for eviction.
 	 */
 	list_del_init(&gpu_alloc->evict_node);
+	mutex_unlock(&kctx->jit_evict_lock);
 
 	if (gpu_alloc->evicted == 0) {
 		/*
@@ -646,10 +817,15 @@
 	}
 
 	/* limit to imported memory */
-	if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) &&
-	     (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM))
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)
 		goto out_unlock;
 
+	/* shareability flags are ignored for GPU uncached memory */
+	if (!(reg->flags & KBASE_REG_GPU_CACHED)) {
+		ret = 0;
+		goto out_unlock;
+	}
+
 	/* no change? */
 	if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) {
 		ret = 0;
@@ -662,28 +838,17 @@
 	reg->flags |= real_flags;
 
 	/* Currently supporting only imported memory */
-	switch (reg->gpu_alloc->type) {
-#ifdef CONFIG_UMP
-	case KBASE_MEM_TYPE_IMPORTED_UMP:
-		ret = kbase_mmu_update_pages(kctx, reg->start_pfn,
-					     kbase_get_gpu_phy_pages(reg),
-				             reg->gpu_alloc->nents, reg->flags);
-		break;
-#endif
 #ifdef CONFIG_DMA_SHARED_BUFFER
-	case KBASE_MEM_TYPE_IMPORTED_UMM:
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
 		/* Future use will use the new flags, existing mapping will NOT be updated
 		 * as memory should not be in use by the GPU when updating the flags.
 		 */
 		ret = 0;
 		WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count);
-		break;
-#endif
-	default:
-		break;
 	}
+#endif /* CONFIG_DMA_SHARED_BUFFER */
 
-	/* roll back on error, i.e. not UMP */
+	/* roll back on error */
 	if (ret)
 		reg->flags = prev_flags;
 
@@ -696,128 +861,6 @@
 
 #define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS)
 
-#ifdef CONFIG_UMP
-static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, ump_secure_id id, u64 *va_pages, u64 *flags)
-{
-	struct kbase_va_region *reg;
-	ump_dd_handle umph;
-	u64 block_count;
-	const ump_dd_physical_block_64 *block_array;
-	u64 i, j;
-	int page = 0;
-	ump_alloc_flags ump_flags;
-	ump_alloc_flags cpu_flags;
-	ump_alloc_flags gpu_flags;
-
-	if (*flags & BASE_MEM_SECURE)
-		goto bad_flags;
-
-	umph = ump_dd_from_secure_id(id);
-	if (UMP_DD_INVALID_MEMORY_HANDLE == umph)
-		goto bad_id;
-
-	ump_flags = ump_dd_allocation_flags_get(umph);
-	cpu_flags = (ump_flags >> UMP_DEVICE_CPU_SHIFT) & UMP_DEVICE_MASK;
-	gpu_flags = (ump_flags >> DEFAULT_UMP_GPU_DEVICE_SHIFT) &
-			UMP_DEVICE_MASK;
-
-	*va_pages = ump_dd_size_get_64(umph);
-	*va_pages >>= PAGE_SHIFT;
-
-	if (!*va_pages)
-		goto bad_size;
-
-	if (*va_pages > (U64_MAX / PAGE_SIZE))
-		/* 64-bit address range is the max */
-		goto bad_size;
-
-	if (*flags & BASE_MEM_SAME_VA)
-		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
-	else
-		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
-
-	if (!reg)
-		goto no_region;
-
-	/* we've got pages to map now, and support SAME_VA */
-	*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
-
-	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMP);
-	if (IS_ERR_OR_NULL(reg->gpu_alloc))
-		goto no_alloc_obj;
-
-	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
-
-	reg->gpu_alloc->imported.ump_handle = umph;
-
-	reg->flags &= ~KBASE_REG_FREE;
-	reg->flags |= KBASE_REG_GPU_NX;	/* UMP is always No eXecute */
-	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMP cannot be grown */
-
-	/* Override import flags based on UMP flags */
-	*flags &= ~(BASE_MEM_CACHED_CPU);
-	*flags &= ~(BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR);
-	*flags &= ~(BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR);
-
-	if ((cpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
-	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) {
-		reg->flags |= KBASE_REG_CPU_CACHED;
-		*flags |= BASE_MEM_CACHED_CPU;
-	}
-
-	if (cpu_flags & UMP_PROT_CPU_WR) {
-		reg->flags |= KBASE_REG_CPU_WR;
-		*flags |= BASE_MEM_PROT_CPU_WR;
-	}
-
-	if (cpu_flags & UMP_PROT_CPU_RD) {
-		reg->flags |= KBASE_REG_CPU_RD;
-		*flags |= BASE_MEM_PROT_CPU_RD;
-	}
-
-	if ((gpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
-	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR))
-		reg->flags |= KBASE_REG_GPU_CACHED;
-
-	if (gpu_flags & UMP_PROT_DEVICE_WR) {
-		reg->flags |= KBASE_REG_GPU_WR;
-		*flags |= BASE_MEM_PROT_GPU_WR;
-	}
-
-	if (gpu_flags & UMP_PROT_DEVICE_RD) {
-		reg->flags |= KBASE_REG_GPU_RD;
-		*flags |= BASE_MEM_PROT_GPU_RD;
-	}
-
-	/* ump phys block query */
-	ump_dd_phys_blocks_get_64(umph, &block_count, &block_array);
-
-	for (i = 0; i < block_count; i++) {
-		for (j = 0; j < (block_array[i].size >> PAGE_SHIFT); j++) {
-			struct tagged_addr tagged;
-
-			tagged = as_tagged(block_array[i].addr +
-					   (j << PAGE_SHIFT));
-			reg->gpu_alloc->pages[page] = tagged;
-			page++;
-		}
-	}
-	reg->gpu_alloc->nents = *va_pages;
-	reg->extent = 0;
-
-	return reg;
-
-no_alloc_obj:
-	kfree(reg);
-no_region:
-bad_size:
-	ump_dd_release(umph);
-bad_id:
-bad_flags:
-	return NULL;
-}
-#endif				/* CONFIG_UMP */
-
 #ifdef CONFIG_DMA_SHARED_BUFFER
 static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
 		int fd, u64 *va_pages, u64 *flags, u32 padding)
@@ -861,15 +904,18 @@
 
 	if (shared_zone) {
 		*flags |= BASE_MEM_NEED_MMAP;
-		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+		reg = kbase_alloc_free_region(&kctx->reg_rbtree_same,
+				0, *va_pages, KBASE_REG_ZONE_SAME_VA);
 	} else {
-		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+		reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
+				0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
 	}
 
 	if (!reg)
 		goto no_region;
 
-	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM);
+	reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages,
+			KBASE_MEM_TYPE_IMPORTED_UMM);
 	if (IS_ERR_OR_NULL(reg->gpu_alloc))
 		goto no_alloc_obj;
 
@@ -884,7 +930,6 @@
 	reg->flags &= ~KBASE_REG_FREE;
 	reg->flags |= KBASE_REG_GPU_NX;	/* UMM is always No eXecute */
 	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMM cannot be grown */
-	reg->flags |= KBASE_REG_GPU_CACHED;
 
 	if (*flags & BASE_MEM_SECURE)
 		reg->flags |= KBASE_REG_SECURE;
@@ -903,6 +948,7 @@
 
 invalid_flags:
 	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
 no_alloc_obj:
 	kfree(reg);
 no_region:
@@ -932,6 +978,7 @@
 {
 	long i;
 	struct kbase_va_region *reg;
+	struct rb_root *rbtree;
 	long faulted_pages;
 	int zone = KBASE_REG_ZONE_CUSTOM_VA;
 	bool shared_zone = false;
@@ -941,6 +988,12 @@
 
 	if ((address & (cache_line_alignment - 1)) != 0 ||
 			(size & (cache_line_alignment - 1)) != 0) {
+		if (*flags & BASE_MEM_UNCACHED_GPU) {
+			dev_warn(kctx->kbdev->dev,
+					"User buffer is not cache line aligned and marked as GPU uncached\n");
+			goto bad_size;
+		}
+
 		/* Coherency must be enabled to handle partial cache lines */
 		if (*flags & (BASE_MEM_COHERENT_SYSTEM |
 			BASE_MEM_COHERENT_SYSTEM_REQUIRED)) {
@@ -983,14 +1036,16 @@
 	if (shared_zone) {
 		*flags |= BASE_MEM_NEED_MMAP;
 		zone = KBASE_REG_ZONE_SAME_VA;
-	}
+		rbtree = &kctx->reg_rbtree_same;
+	} else
+		rbtree = &kctx->reg_rbtree_custom;
 
-	reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone);
+	reg = kbase_alloc_free_region(rbtree, 0, *va_pages, zone);
 
 	if (!reg)
 		goto no_region;
 
-	reg->gpu_alloc = kbase_alloc_create(*va_pages,
+	reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages,
 			KBASE_MEM_TYPE_IMPORTED_USER_BUF);
 	if (IS_ERR_OR_NULL(reg->gpu_alloc))
 		goto no_alloc_obj;
@@ -1010,6 +1065,11 @@
 	user_buf->address = address;
 	user_buf->nr_pages = *va_pages;
 	user_buf->mm = current->mm;
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
+	atomic_inc(&current->mm->mm_count);
+#else
+	mmgrab(current->mm);
+#endif
 	user_buf->pages = kmalloc_array(*va_pages, sizeof(struct page *),
 			GFP_KERNEL);
 
@@ -1047,8 +1107,6 @@
 	if (faulted_pages != *va_pages)
 		goto fault_mismatch;
 
-	atomic_inc(&current->mm->mm_count);
-
 	reg->gpu_alloc->nents = 0;
 	reg->extent = 0;
 
@@ -1095,7 +1153,6 @@
 		for (i = 0; i < faulted_pages; i++)
 			put_page(pages[i]);
 	}
-	kfree(user_buf->pages);
 no_page_array:
 invalid_flags:
 	kbase_mem_phy_alloc_put(reg->cpu_alloc);
@@ -1155,21 +1212,23 @@
 		/* 64-bit tasks must MMAP anyway, but not expose this address to
 		 * clients */
 		*flags |= BASE_MEM_NEED_MMAP;
-		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
-					      KBASE_REG_ZONE_SAME_VA);
+		reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0,
+				*num_pages,
+				KBASE_REG_ZONE_SAME_VA);
 	} else {
 #else
 	if (1) {
 #endif
-		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
-					      KBASE_REG_ZONE_CUSTOM_VA);
+		reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
+				0, *num_pages,
+				KBASE_REG_ZONE_CUSTOM_VA);
 	}
 
 	if (!reg)
 		goto no_reg;
 
 	/* zero-sized page array, as we don't need one/can support one */
-	reg->gpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS);
+	reg->gpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_ALIAS);
 	if (IS_ERR_OR_NULL(reg->gpu_alloc))
 		goto no_alloc_obj;
 
@@ -1213,6 +1272,8 @@
 				goto bad_handle; /* Free region */
 			if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
 				goto bad_handle; /* Ephemeral region */
+			if (!(aliasing_reg->flags & KBASE_REG_GPU_CACHED))
+				goto bad_handle; /* GPU uncached memory */
 			if (!aliasing_reg->gpu_alloc)
 				goto bad_handle; /* No alloc */
 			if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
@@ -1339,17 +1400,6 @@
 	}
 
 	switch (type) {
-#ifdef CONFIG_UMP
-	case BASE_MEM_IMPORT_TYPE_UMP: {
-		ump_secure_id id;
-
-		if (get_user(id, (ump_secure_id __user *)phandle))
-			reg = NULL;
-		else
-			reg = kbase_mem_from_ump(kctx, id, va_pages, flags);
-	}
-	break;
-#endif /* CONFIG_UMP */
 #ifdef CONFIG_DMA_SHARED_BUFFER
 	case BASE_MEM_IMPORT_TYPE_UMM: {
 		int fd;
@@ -1456,13 +1506,13 @@
 
 	/* Map the new pages into the GPU */
 	phy_pages = kbase_get_gpu_phy_pages(reg);
-	ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages,
-			phy_pages + old_pages, delta, reg->flags);
+	ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages,
+			phy_pages + old_pages, delta, reg->flags, kctx->as_nr);
 
 	return ret;
 }
 
-static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
 		struct kbase_va_region *reg,
 		u64 new_pages, u64 old_pages)
 {
@@ -1477,15 +1527,15 @@
 			(old_pages - new_pages)<<PAGE_SHIFT, 1);
 }
 
-static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
 		struct kbase_va_region *reg,
 		u64 new_pages, u64 old_pages)
 {
 	u64 delta = old_pages - new_pages;
 	int ret = 0;
 
-	ret = kbase_mmu_teardown_pages(kctx,
-			reg->start_pfn + new_pages, delta);
+	ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
+			reg->start_pfn + new_pages, delta, kctx->as_nr);
 
 	return ret;
 }
@@ -1717,7 +1767,13 @@
 	.fault = kbase_cpu_vm_fault
 };
 
-static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close)
+static int kbase_cpu_mmap(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		struct vm_area_struct *vma,
+		void *kaddr,
+		size_t nr_pages,
+		unsigned long aligned_offset,
+		int free_on_close)
 {
 	struct kbase_cpu_mapping *map;
 	struct tagged_addr *page_array;
@@ -1799,7 +1855,7 @@
 
 	map->region = reg;
 	map->free_on_close = free_on_close;
-	map->kctx = reg->kctx;
+	map->kctx = kctx;
 	map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
 	map->count = 1; /* start with one ref */
 
@@ -1812,91 +1868,6 @@
 	return err;
 }
 
-static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr)
-{
-	struct kbase_va_region *new_reg;
-	u32 nr_pages;
-	size_t size;
-	int err = 0;
-	u32 *tb;
-	int owns_tb = 1;
-
-	dev_dbg(kctx->kbdev->dev, "in %s\n", __func__);
-	size = (vma->vm_end - vma->vm_start);
-	nr_pages = size >> PAGE_SHIFT;
-
-	if (!kctx->jctx.tb) {
-		KBASE_DEBUG_ASSERT(0 != size);
-		tb = vmalloc_user(size);
-
-		if (NULL == tb) {
-			err = -ENOMEM;
-			goto out;
-		}
-
-		err = kbase_device_trace_buffer_install(kctx, tb, size);
-		if (err) {
-			vfree(tb);
-			goto out;
-		}
-	} else {
-		err = -EINVAL;
-		goto out;
-	}
-
-	*kaddr = kctx->jctx.tb;
-
-	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
-	if (!new_reg) {
-		err = -ENOMEM;
-		WARN_ON(1);
-		goto out_no_region;
-	}
-
-	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB);
-	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
-		err = -ENOMEM;
-		new_reg->cpu_alloc = NULL;
-		WARN_ON(1);
-		goto out_no_alloc;
-	}
-
-	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
-
-	new_reg->cpu_alloc->imported.kctx = kctx;
-	new_reg->flags &= ~KBASE_REG_FREE;
-	new_reg->flags |= KBASE_REG_CPU_CACHED;
-
-	/* alloc now owns the tb */
-	owns_tb = 0;
-
-	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
-		err = -ENOMEM;
-		WARN_ON(1);
-		goto out_no_va_region;
-	}
-
-	*reg = new_reg;
-
-	/* map read only, noexec */
-	vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
-	/* the rest of the flags is added by the cpu_mmap handler */
-
-	dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
-	return 0;
-
-out_no_va_region:
-out_no_alloc:
-	kbase_free_alloced_region(new_reg);
-out_no_region:
-	if (owns_tb) {
-		kbase_device_trace_buffer_uninstall(kctx);
-		vfree(tb);
-	}
-out:
-	return err;
-}
-
 static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr)
 {
 	struct kbase_va_region *new_reg;
@@ -1916,14 +1887,15 @@
 		goto out;
 	}
 
-	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	new_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, nr_pages,
+			KBASE_REG_ZONE_SAME_VA);
 	if (!new_reg) {
 		err = -ENOMEM;
 		WARN_ON(1);
 		goto out;
 	}
 
-	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW);
+	new_reg->cpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_RAW);
 	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
 		err = -ENOMEM;
 		new_reg->cpu_alloc = NULL;
@@ -2090,14 +2062,6 @@
 		/* Illegal handle for direct map */
 		err = -EINVAL;
 		goto out_unlock;
-	case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE):
-		err = kbase_trace_buffer_mmap(kctx, vma, &reg, &kaddr);
-		if (0 != err)
-			goto out_unlock;
-		dev_dbg(dev, "kbase_trace_buffer_mmap ok\n");
-		/* free the region on munmap */
-		free_on_close = 1;
-		break;
 	case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
 		/* MMU dump */
 		err = kbase_mmu_dump_mmap(kctx, vma, &reg, &kaddr);
@@ -2169,7 +2133,8 @@
 	} /* default */
 	} /* switch */
 
-	err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close);
+	err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset,
+			free_on_close);
 
 	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
 		/* MMU dump - userspace should now have a reference on
@@ -2188,11 +2153,11 @@
 
 KBASE_EXPORT_TEST_API(kbase_mmap);
 
-static void kbasep_sync_mem_regions(struct kbase_context *kctx,
+void kbase_sync_mem_regions(struct kbase_context *kctx,
 		struct kbase_vmap_struct *map, enum kbase_sync_type dest)
 {
 	size_t i;
-	off_t const offset = (uintptr_t)map->gpu_addr & ~PAGE_MASK;
+	off_t const offset = map->offset_in_page;
 	size_t const page_count = PFN_UP(offset + map->size);
 
 	/* Sync first page */
@@ -2218,66 +2183,55 @@
 	}
 }
 
-void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
-		      unsigned long prot_request, struct kbase_vmap_struct *map)
+static int kbase_vmap_phy_pages(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 offset_bytes, size_t size,
+		struct kbase_vmap_struct *map)
 {
-	struct kbase_va_region *reg;
 	unsigned long page_index;
-	unsigned int offset = gpu_addr & ~PAGE_MASK;
-	size_t page_count = PFN_UP(offset + size);
+	unsigned int offset_in_page = offset_bytes & ~PAGE_MASK;
+	size_t page_count = PFN_UP(offset_in_page + size);
 	struct tagged_addr *page_array;
 	struct page **pages;
 	void *cpu_addr = NULL;
 	pgprot_t prot;
 	size_t i;
 
-	if (!size || !map)
-		return NULL;
+	if (!size || !map || !reg->cpu_alloc || !reg->gpu_alloc)
+		return -EINVAL;
 
 	/* check if page_count calculation will wrap */
 	if (size > ((size_t)-1 / PAGE_SIZE))
-		return NULL;
+		return -EINVAL;
 
-	kbase_gpu_vm_lock(kctx);
-
-	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
-	if (!reg || (reg->flags & KBASE_REG_FREE))
-		goto out_unlock;
-
-	page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn;
+	page_index = offset_bytes >> PAGE_SHIFT;
 
 	/* check if page_index + page_count will wrap */
 	if (-1UL - page_count < page_index)
-		goto out_unlock;
+		return -EINVAL;
 
 	if (page_index + page_count > kbase_reg_current_backed_size(reg))
-		goto out_unlock;
+		return -ENOMEM;
 
 	if (reg->flags & KBASE_REG_DONT_NEED)
-		goto out_unlock;
-
-	/* check access permissions can be satisfied
-	 * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */
-	if ((reg->flags & prot_request) != prot_request)
-		goto out_unlock;
-
-	page_array = kbase_get_cpu_phy_pages(reg);
-	if (!page_array)
-		goto out_unlock;
-
-	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
-	if (!pages)
-		goto out_unlock;
-
-	for (i = 0; i < page_count; i++)
-		pages[i] = phys_to_page(as_phys_addr_t(page_array[page_index +
-								  i]));
+		return -EINVAL;
 
 	prot = PAGE_KERNEL;
 	if (!(reg->flags & KBASE_REG_CPU_CACHED)) {
 		/* Map uncached */
 		prot = pgprot_writecombine(prot);
 	}
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+	if (!page_array)
+		return -ENOMEM;
+
+	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	for (i = 0; i < page_count; i++)
+		pages[i] = as_page(page_array[page_index + i]);
+
 	/* Note: enforcing a RO prot_request onto prot is not done, since:
 	 * - CPU-arch-specific integration required
 	 * - kbase_vmap() requires no access checks to be made/enforced */
@@ -2287,26 +2241,66 @@
 	kfree(pages);
 
 	if (!cpu_addr)
-		goto out_unlock;
+		return -ENOMEM;
 
-	map->gpu_addr = gpu_addr;
-	map->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->offset_in_page = offset_in_page;
+	map->cpu_alloc = reg->cpu_alloc;
 	map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index];
-	map->gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+	map->gpu_alloc = reg->gpu_alloc;
 	map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index];
-	map->addr = (void *)((uintptr_t)cpu_addr + offset);
+	map->addr = (void *)((uintptr_t)cpu_addr + offset_in_page);
 	map->size = size;
 	map->sync_needed = ((reg->flags & KBASE_REG_CPU_CACHED) != 0) &&
 		!kbase_mem_is_imported(map->gpu_alloc->type);
 
 	if (map->sync_needed)
-		kbasep_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU);
-	kbase_gpu_vm_unlock(kctx);
+		kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU);
 
-	return map->addr;
+	return 0;
+}
+
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map)
+{
+	struct kbase_va_region *reg;
+	void *addr = NULL;
+	u64 offset_bytes;
+	struct kbase_mem_phy_alloc *cpu_alloc;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+	int err;
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+			gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	/* check access permissions can be satisfied
+	 * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR}
+	 */
+	if ((reg->flags & prot_request) != prot_request)
+		goto out_unlock;
+
+	offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT);
+	cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map);
+	if (err < 0)
+		goto fail_vmap_phy_pages;
+
+	addr = map->addr;
 
 out_unlock:
 	kbase_gpu_vm_unlock(kctx);
+	return addr;
+
+fail_vmap_phy_pages:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_mem_phy_alloc_put(cpu_alloc);
+	kbase_mem_phy_alloc_put(gpu_alloc);
+
 	return NULL;
 }
 
@@ -2322,22 +2316,29 @@
 }
 KBASE_EXPORT_TEST_API(kbase_vmap);
 
-void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
+		struct kbase_vmap_struct *map)
 {
 	void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
 	vunmap(addr);
 
 	if (map->sync_needed)
-		kbasep_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE);
-	map->gpu_addr = 0;
-	map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc);
-	map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc);
+		kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE);
+
+	map->offset_in_page = 0;
 	map->cpu_pages = NULL;
 	map->gpu_pages = NULL;
 	map->addr = NULL;
 	map->size = 0;
 	map->sync_needed = false;
 }
+
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+{
+	kbase_vunmap_phy_pages(kctx, map);
+	map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc);
+	map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc);
+}
 KBASE_EXPORT_TEST_API(kbase_vunmap);
 
 void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
@@ -2467,7 +2468,8 @@
 	handle->size   = size;
 
 
-	reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA);
+	reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, pages,
+			KBASE_REG_ZONE_SAME_VA);
 	if (!reg)
 		goto no_reg;
 
@@ -2475,7 +2477,7 @@
 	if (kbase_update_region_flags(kctx, reg, flags) != 0)
 		goto invalid_flags;
 
-	reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW);
+	reg->cpu_alloc = kbase_alloc_create(kctx, pages, KBASE_MEM_TYPE_RAW);
 	if (IS_ERR_OR_NULL(reg->cpu_alloc))
 		goto no_alloc;
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
index 301fdc3..a8a52a7 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010, 2012-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010, 2012-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -37,14 +37,84 @@
 	size_t      size;
 };
 
+/**
+ * kbase_mem_alloc - Create a new allocation for GPU
+ *
+ * @kctx:         The kernel context
+ * @va_pages:     The number of pages of virtual address space to reserve
+ * @commit_pages: The number of physical pages to allocate upfront
+ * @extent:       The number of extra pages to allocate on each GPU fault which
+ *                grows the region.
+ * @flags:        bitmask of BASE_MEM_* flags to convey special requirements &
+ *                properties for the new allocation.
+ * @gpu_va:       Start address of the memory region which was allocated from GPU
+ *                virtual address space.
+ *
+ * Return: 0 on success or error code
+ */
 struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
 		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
 		u64 *gpu_va);
-int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages);
+
+/**
+ * kbase_mem_query - Query properties of a GPU memory region
+ *
+ * @kctx:     The kernel context
+ * @gpu_addr: A GPU address contained within the memory region
+ * @query:    The type of query, from KBASE_MEM_QUERY_* flags, which could be
+ *            regarding the amount of backing physical memory allocated so far
+ *            for the region or the size of the region or the flags associated
+ *            with the region.
+ * @out:      Pointer to the location to store the result of query.
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query,
+		u64 *const out);
+
+/**
+ * kbase_mem_import - Import the external memory for use by the GPU
+ *
+ * @kctx:     The kernel context
+ * @type:     Type of external memory
+ * @phandle:  Handle to the external memory interpreted as per the type.
+ * @padding:  Amount of extra VA pages to append to the imported buffer
+ * @gpu_va:   GPU address assigned to the imported external memory
+ * @va_pages: Size of the memory region reserved from the GPU address space
+ * @flags:    bitmask of BASE_MEM_* flags to convey special requirements &
+ *            properties for the new allocation representing the external
+ *            memory.
+ * Return: 0 on success or error code
+ */
 int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
 		void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages,
 		u64 *flags);
+
+/**
+ * kbase_mem_alias - Create a new allocation for GPU, aliasing one or more
+ *                   memory regions
+ *
+ * @kctx:      The kernel context
+ * @flags:     bitmask of BASE_MEM_* flags.
+ * @stride:    Bytes between start of each memory region
+ * @nents:     The number of regions to pack together into the alias
+ * @ai:        Pointer to the struct containing the memory aliasing info
+ * @num_pages: Number of pages the alias will cover
+ *
+ * Return: 0 on failure or otherwise the GPU VA for the alias
+ */
 u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages);
+
+/**
+ * kbase_mem_flags_change - Change the flags for a memory region
+ *
+ * @kctx:     The kernel context
+ * @gpu_addr: A GPU address contained within the memory region to modify.
+ * @flags:    The new flags to set
+ * @mask:     Mask of the flags, from BASE_MEM_*, to modify.
+ *
+ * Return: 0 on success or error code
+ */
 int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask);
 
 /**
@@ -58,10 +128,19 @@
  */
 int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages);
 
+/**
+ * kbase_mmap - Mmap method, gets invoked when mmap system call is issued on
+ *              device file /dev/malixx.
+ * @file: Pointer to the device file /dev/malixx instance.
+ * @vma:  Pointer to the struct containing the info where the GPU allocation
+ *        will be mapped in virtual address space of CPU.
+ *
+ * Return: 0 on success or error code
+ */
 int kbase_mmap(struct file *file, struct vm_area_struct *vma);
 
 /**
- * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction
+ * kbase_mem_evictable_init - Initialize the Ephemeral memory eviction
  * mechanism.
  * @kctx: The kbase context to initialize.
  *
@@ -127,7 +206,7 @@
 bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc);
 
 struct kbase_vmap_struct {
-	u64 gpu_addr;
+	off_t offset_in_page;
 	struct kbase_mem_phy_alloc *cpu_alloc;
 	struct kbase_mem_phy_alloc *gpu_alloc;
 	struct tagged_addr *cpu_pages;
@@ -242,4 +321,127 @@
 
 extern const struct vm_operations_struct kbase_vm_ops;
 
+/**
+ * kbase_sync_mem_regions - Perform the cache maintenance for the kernel mode
+ *                          CPU mapping.
+ * @kctx: Context the CPU mapping belongs to.
+ * @map:  Structure describing the CPU mapping, setup previously by the
+ *        kbase_vmap() call.
+ * @dest: Indicates the type of maintenance required (i.e. flush or invalidate)
+ *
+ * Note: The caller shall ensure that CPU mapping is not revoked & remains
+ * active whilst the maintenance is in progress.
+ */
+void kbase_sync_mem_regions(struct kbase_context *kctx,
+		struct kbase_vmap_struct *map, enum kbase_sync_type dest);
+
+/**
+ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Shrink (or completely remove) all CPU mappings which reference the shrunk
+ * part of the allocation.
+ */
+void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region or NULL if there isn't one
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, negative -errno on error
+ *
+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
+ * itself is unmodified as we still need to reserve the VA, only the page tables
+ * will be modified by this function.
+ */
+int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_phy_alloc_mapping_term - Terminate the kernel side mapping of a
+ *                                physical allocation
+ * @kctx:  The kernel base context associated with the mapping
+ * @alloc: Pointer to the allocation to terminate
+ *
+ * This function will unmap the kernel mapping, and free any structures used to
+ * track it.
+ */
+void kbase_phy_alloc_mapping_term(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_phy_alloc_mapping_get - Get a kernel-side CPU pointer to the permanent
+ *                               mapping of a physical allocation
+ * @kctx:             The kernel base context @gpu_addr will be looked up in
+ * @gpu_addr:         The gpu address to lookup for the kernel-side CPU mapping
+ * @out_kern_mapping: Pointer to storage for a struct kbase_vmap_struct pointer
+ *                    which will be used for a call to
+ *                    kbase_phy_alloc_mapping_put()
+ *
+ * Return: Pointer to a kernel-side accessible location that directly
+ *         corresponds to @gpu_addr, or NULL on failure
+ *
+ * Looks up @gpu_addr to retrieve the CPU pointer that can be used to access
+ * that location kernel-side. Only certain kinds of memory have a permanent
+ * kernel mapping, refer to the internal functions
+ * kbase_reg_needs_kernel_mapping() and kbase_phy_alloc_mapping_init() for more
+ * information.
+ *
+ * If this function succeeds, a CPU access to the returned pointer will access
+ * the actual location represented by @gpu_addr. That is, the return value does
+ * not require any offset added to it to access the location specified in
+ * @gpu_addr
+ *
+ * The client must take care to either apply any necessary sync operations when
+ * accessing the data, or ensure that the enclosing region was coherent with
+ * the GPU, or uncached in the CPU.
+ *
+ * The refcount on the physical allocations backing the region are taken, so
+ * that they do not disappear whilst the client is accessing it. Once the
+ * client has finished accessing the memory, it must be released with a call to
+ * kbase_phy_alloc_mapping_put()
+ *
+ * Whilst this is expected to execute quickly (the mapping was already setup
+ * when the physical allocation was created), the call is not IRQ-safe due to
+ * the region lookup involved.
+ *
+ * An error code may indicate that:
+ * - a userside process has freed the allocation, and so @gpu_addr is no longer
+ *   valid
+ * - the region containing @gpu_addr does not support a permanent kernel mapping
+ */
+void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, u64 gpu_addr,
+		struct kbase_vmap_struct **out_kern_mapping);
+
+/**
+ * kbase_phy_alloc_mapping_put - Put a reference to the kernel-side mapping of a
+ *                               physical allocation
+ * @kctx:         The kernel base context associated with the mapping
+ * @kern_mapping: Pointer to a struct kbase_phy_alloc_mapping pointer obtained
+ *                from a call to kbase_phy_alloc_mapping_get()
+ *
+ * Releases the reference to the allocations backing @kern_mapping that was
+ * obtained through a call to kbase_phy_alloc_mapping_get(). This must be used
+ * when the client no longer needs to access the kernel-side CPU pointer.
+ *
+ * If this was the last reference on the underlying physical allocations, they
+ * will go through the normal allocation free steps, which also includes an
+ * unmap of the permanent kernel mapping for those allocations.
+ *
+ * Due to these operations, the function is not IRQ-safe. However it is
+ * expected to execute quickly in the normal case, i.e. when the region holding
+ * the physical allocation is still present.
+ */
+void kbase_phy_alloc_mapping_put(struct kbase_context *kctx,
+		struct kbase_vmap_struct *kern_mapping);
+
 #endif				/* _KBASE_MEM_LINUX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
index 0c2b70b..7011603 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2014,2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2014,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -53,11 +53,50 @@
 #define HUGE_HEAD    (1u << 1)
 #define FROM_PARTIAL (1u << 2)
 
+/*
+ * Note: if macro for converting physical address to page is not defined
+ * in the kernel itself, it is defined hereby. This is to avoid build errors
+ * which are reported during builds for some architectures.
+ */
+#ifndef phys_to_page
+#define phys_to_page(phys)	(pfn_to_page((phys) >> PAGE_SHIFT))
+#endif
+
+/**
+ * as_phys_addr_t - Retrieve the physical address from tagged address by
+ *                  masking the lower order 12 bits.
+ * @t: tagged address to be translated.
+ *
+ * Return: physical address corresponding to tagged address.
+ */
 static inline phys_addr_t as_phys_addr_t(struct tagged_addr t)
 {
 	return t.tagged_addr & PAGE_MASK;
 }
 
+/**
+ * as_page - Retrieve the struct page from a tagged address
+ * @t: tagged address to be translated.
+ *
+ * Return: pointer to struct page corresponding to tagged address.
+ */
+static inline struct page *as_page(struct tagged_addr t)
+{
+	return phys_to_page(as_phys_addr_t(t));
+}
+
+/**
+ * as_tagged - Convert the physical address to tagged address type though
+ *             there is no tag info present, the lower order 12 bits will be 0
+ * @phys: physical address to be converted to tagged type
+ *
+ * This is used for 4KB physical pages allocated by the Driver or imported pages
+ * and is needed as physical pages tracking object stores the reference for
+ * physical pages using tagged address type in lieu of the type generally used
+ * for physical addresses.
+ *
+ * Return: address of tagged address type.
+ */
 static inline struct tagged_addr as_tagged(phys_addr_t phys)
 {
 	struct tagged_addr t;
@@ -66,6 +105,16 @@
 	return t;
 }
 
+/**
+ * as_tagged_tag - Form the tagged address by storing the tag or metadata in the
+ *                 lower order 12 bits of physial address
+ * @phys: physical address to be converted to tagged address
+ * @tag:  tag to be stored along with the physical address.
+ *
+ * The tag info is used while freeing up the pages
+ *
+ * Return: tagged address storing physical address & tag.
+ */
 static inline struct tagged_addr as_tagged_tag(phys_addr_t phys, int tag)
 {
 	struct tagged_addr t;
@@ -74,11 +123,26 @@
 	return t;
 }
 
+/**
+ * is_huge - Check if the physical page is one of the 512 4KB pages of the
+ *           large page which was not split to be used partially
+ * @t: tagged address storing the tag in the lower order bits.
+ *
+ * Return: true if page belongs to large page, or false
+ */
 static inline bool is_huge(struct tagged_addr t)
 {
 	return t.tagged_addr & HUGE_PAGE;
 }
 
+/**
+ * is_huge_head - Check if the physical page is the first 4KB page of the
+ *                512 4KB pages within a large page which was not split
+ *                to be used partially
+ * @t: tagged address storing the tag in the lower order bits.
+ *
+ * Return: true if page is the first page of a large page, or false
+ */
 static inline bool is_huge_head(struct tagged_addr t)
 {
 	int mask = HUGE_HEAD | HUGE_PAGE;
@@ -86,6 +150,14 @@
 	return mask == (t.tagged_addr & mask);
 }
 
+/**
+ * is_partial - Check if the physical page is one of the 512 pages of the
+ *              large page which was split in 4KB pages to be used
+ *              partially for allocations >= 2 MB in size.
+ * @t: tagged address storing the tag in the lower order bits.
+ *
+ * Return: true if page was taken from large page used partially, or false
+ */
 static inline bool is_partial(struct tagged_addr t)
 {
 	return t.tagged_addr & FROM_PARTIAL;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
index 574f1d5..0f91be1 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -39,16 +39,6 @@
 #define NOT_DIRTY false
 #define NOT_RECLAIMED false
 
-static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
-{
-	spin_lock(&pool->pool_lock);
-}
-
-static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool)
-{
-	spin_unlock(&pool->pool_lock);
-}
-
 static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
 {
 	ssize_t max_size = kbase_mem_pool_max_size(pool);
@@ -177,12 +167,6 @@
 	gfp = GFP_HIGHUSER | __GFP_ZERO;
 #endif
 
-	if (current->flags & PF_KTHREAD) {
-		/* Don't trigger OOM killer from kernel threads, e.g. when
-		 * growing memory on GPU page fault */
-		gfp |= __GFP_NORETRY;
-	}
-
 	/* don't warn on higer order failures */
 	if (pool->order)
 		gfp |= __GFP_NOWARN;
@@ -255,12 +239,33 @@
 	struct page *p;
 	size_t i;
 
+	kbase_mem_pool_lock(pool);
+
+	pool->dont_reclaim = true;
 	for (i = 0; i < nr_to_grow; i++) {
-		p = kbase_mem_alloc_page(pool);
-		if (!p)
+		if (pool->dying) {
+			pool->dont_reclaim = false;
+			kbase_mem_pool_shrink_locked(pool, nr_to_grow);
+			kbase_mem_pool_unlock(pool);
+
 			return -ENOMEM;
-		kbase_mem_pool_add(pool, p);
+		}
+		kbase_mem_pool_unlock(pool);
+
+		p = kbase_mem_alloc_page(pool);
+		if (!p) {
+			kbase_mem_pool_lock(pool);
+			pool->dont_reclaim = false;
+			kbase_mem_pool_unlock(pool);
+
+			return -ENOMEM;
+		}
+
+		kbase_mem_pool_lock(pool);
+		kbase_mem_pool_add_locked(pool, p);
 	}
+	pool->dont_reclaim = false;
+	kbase_mem_pool_unlock(pool);
 
 	return 0;
 }
@@ -312,10 +317,19 @@
 		struct shrink_control *sc)
 {
 	struct kbase_mem_pool *pool;
+	size_t pool_size;
 
 	pool = container_of(s, struct kbase_mem_pool, reclaim);
-	pool_dbg(pool, "reclaim count: %zu\n", kbase_mem_pool_size(pool));
-	return kbase_mem_pool_size(pool);
+
+	kbase_mem_pool_lock(pool);
+	if (pool->dont_reclaim && !pool->dying) {
+		kbase_mem_pool_unlock(pool);
+		return 0;
+	}
+	pool_size = kbase_mem_pool_size(pool);
+	kbase_mem_pool_unlock(pool);
+
+	return pool_size;
 }
 
 static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
@@ -326,9 +340,17 @@
 
 	pool = container_of(s, struct kbase_mem_pool, reclaim);
 
+	kbase_mem_pool_lock(pool);
+	if (pool->dont_reclaim && !pool->dying) {
+		kbase_mem_pool_unlock(pool);
+		return 0;
+	}
+
 	pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan);
 
-	freed = kbase_mem_pool_shrink(pool, sc->nr_to_scan);
+	freed = kbase_mem_pool_shrink_locked(pool, sc->nr_to_scan);
+
+	kbase_mem_pool_unlock(pool);
 
 	pool_dbg(pool, "reclaim freed %ld pages\n", freed);
 
@@ -357,6 +379,7 @@
 	pool->order = order;
 	pool->kbdev = kbdev;
 	pool->next_pool = next_pool;
+	pool->dying = false;
 
 	spin_lock_init(&pool->pool_lock);
 	INIT_LIST_HEAD(&pool->page_list);
@@ -381,12 +404,20 @@
 	return 0;
 }
 
+void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool)
+{
+	kbase_mem_pool_lock(pool);
+	pool->dying = true;
+	kbase_mem_pool_unlock(pool);
+}
+
 void kbase_mem_pool_term(struct kbase_mem_pool *pool)
 {
 	struct kbase_mem_pool *next_pool = pool->next_pool;
-	struct page *p;
+	struct page *p, *tmp;
 	size_t nr_to_spill = 0;
 	LIST_HEAD(spill_list);
+	LIST_HEAD(free_list);
 	int i;
 
 	pool_dbg(pool, "terminate()\n");
@@ -404,7 +435,6 @@
 		/* Zero pages first without holding the next_pool lock */
 		for (i = 0; i < nr_to_spill; i++) {
 			p = kbase_mem_pool_remove_locked(pool);
-			kbase_mem_pool_zero_page(pool, p);
 			list_add(&p->lru, &spill_list);
 		}
 	}
@@ -412,18 +442,26 @@
 	while (!kbase_mem_pool_is_empty(pool)) {
 		/* Free remaining pages to kernel */
 		p = kbase_mem_pool_remove_locked(pool);
-		kbase_mem_pool_free_page(pool, p);
+		list_add(&p->lru, &free_list);
 	}
 
 	kbase_mem_pool_unlock(pool);
 
 	if (next_pool && nr_to_spill) {
+		list_for_each_entry(p, &spill_list, lru)
+			kbase_mem_pool_zero_page(pool, p);
+
 		/* Add new page list to next_pool */
 		kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill);
 
 		pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill);
 	}
 
+	list_for_each_entry_safe(p, tmp, &free_list, lru) {
+		list_del_init(&p->lru);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
 	pool_dbg(pool, "terminated\n");
 }
 
@@ -444,6 +482,21 @@
 	return NULL;
 }
 
+struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	pool_dbg(pool, "alloc_locked()\n");
+	p = kbase_mem_pool_remove_locked(pool);
+
+	if (p)
+		return p;
+
+	return NULL;
+}
+
 void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
 		bool dirty)
 {
@@ -466,6 +519,25 @@
 	}
 }
 
+void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty)
+{
+	pool_dbg(pool, "free_locked()\n");
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (!kbase_mem_pool_is_full(pool)) {
+		/* Add to our own pool */
+		if (dirty)
+			kbase_mem_pool_sync_page(pool, p);
+
+		kbase_mem_pool_add_locked(pool, p);
+	} else {
+		/* Free page */
+		kbase_mem_pool_free_page(pool, p);
+	}
+}
+
 int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 		struct tagged_addr *pages, bool partial_allowed)
 {
@@ -543,7 +615,6 @@
 
 done:
 	pool_dbg(pool, "alloc_pages(%zu) done\n", i);
-
 	return i;
 
 err_rollback:
@@ -551,6 +622,49 @@
 	return err;
 }
 
+int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_4k_pages, struct tagged_addr *pages)
+{
+	struct page *p;
+	size_t i;
+	size_t nr_pages_internal;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	nr_pages_internal = nr_4k_pages / (1u << (pool->order));
+
+	if (nr_pages_internal * (1u << pool->order) != nr_4k_pages)
+		return -EINVAL;
+
+	pool_dbg(pool, "alloc_pages_locked(4k=%zu):\n", nr_4k_pages);
+	pool_dbg(pool, "alloc_pages_locked(internal=%zu):\n",
+			nr_pages_internal);
+
+	if (kbase_mem_pool_size(pool) < nr_pages_internal) {
+		pool_dbg(pool, "Failed alloc\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_pages_internal; i++) {
+		int j;
+
+		p = kbase_mem_pool_remove_locked(pool);
+		if (pool->order) {
+			*pages++ = as_tagged_tag(page_to_phys(p),
+						   HUGE_HEAD | HUGE_PAGE);
+			for (j = 1; j < (1u << pool->order); j++) {
+				*pages++ = as_tagged_tag(page_to_phys(p) +
+							   PAGE_SIZE * j,
+							   HUGE_PAGE);
+			}
+		} else {
+			*pages++ = as_tagged(page_to_phys(p));
+		}
+	}
+
+	return nr_4k_pages;
+}
+
 static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool,
 				     size_t nr_pages, struct tagged_addr *pages,
 				     bool zero, bool sync)
@@ -572,7 +686,7 @@
 			continue;
 
 		if (is_huge_head(pages[i]) || !is_huge(pages[i])) {
-			p = phys_to_page(as_phys_addr_t(pages[i]));
+			p = as_page(pages[i]);
 			if (zero)
 				kbase_mem_pool_zero_page(pool, p);
 			else if (sync)
@@ -591,6 +705,48 @@
 			nr_pages, nr_to_pool);
 }
 
+static void kbase_mem_pool_add_array_locked(struct kbase_mem_pool *pool,
+		size_t nr_pages, struct tagged_addr *pages,
+		bool zero, bool sync)
+{
+	struct page *p;
+	size_t nr_to_pool = 0;
+	LIST_HEAD(new_page_list);
+	size_t i;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (!nr_pages)
+		return;
+
+	pool_dbg(pool, "add_array_locked(%zu, zero=%d, sync=%d):\n",
+			nr_pages, zero, sync);
+
+	/* Zero/sync pages first */
+	for (i = 0; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge_head(pages[i]) || !is_huge(pages[i])) {
+			p = as_page(pages[i]);
+			if (zero)
+				kbase_mem_pool_zero_page(pool, p);
+			else if (sync)
+				kbase_mem_pool_sync_page(pool, p);
+
+			list_add(&p->lru, &new_page_list);
+			nr_to_pool++;
+		}
+		pages[i] = as_tagged(0);
+	}
+
+	/* Add new page list to pool */
+	kbase_mem_pool_add_list_locked(pool, &new_page_list, nr_to_pool);
+
+	pool_dbg(pool, "add_array_locked(%zu) added %zu pages\n",
+			nr_pages, nr_to_pool);
+}
+
 void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
 		struct tagged_addr *pages, bool dirty, bool reclaimed)
 {
@@ -632,7 +788,7 @@
 			continue;
 		}
 
-		p = phys_to_page(as_phys_addr_t(pages[i]));
+		p = as_page(pages[i]);
 
 		kbase_mem_pool_free_page(pool, p);
 		pages[i] = as_tagged(0);
@@ -640,3 +796,47 @@
 
 	pool_dbg(pool, "free_pages(%zu) done\n", nr_pages);
 }
+
+
+void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_pages, struct tagged_addr *pages, bool dirty,
+		bool reclaimed)
+{
+	struct page *p;
+	size_t nr_to_pool;
+	LIST_HEAD(to_pool_list);
+	size_t i = 0;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	pool_dbg(pool, "free_pages_locked(%zu):\n", nr_pages);
+
+	if (!reclaimed) {
+		/* Add to this pool */
+		nr_to_pool = kbase_mem_pool_capacity(pool);
+		nr_to_pool = min(nr_pages, nr_to_pool);
+
+		kbase_mem_pool_add_array_locked(pool, nr_pages, pages, false,
+				dirty);
+
+		i += nr_to_pool;
+	}
+
+	/* Free any remaining pages to kernel */
+	for (; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge(pages[i]) && !is_huge_head(pages[i])) {
+			pages[i] = as_tagged(0);
+			continue;
+		}
+
+		p = as_page(pages[i]);
+
+		kbase_mem_pool_free_page(pool, p);
+		pages[i] = as_tagged(0);
+	}
+
+	pool_dbg(pool, "free_pages_locked(%zu) done\n", nr_pages);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
index cb968f6..43b0f6c03 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -32,7 +32,8 @@
  * The size of the buffer to accumulate the histogram report text in
  * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
  */
-#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t) (64 + ((80 + (56 * 64)) * 15) + 56))
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE \
+	((size_t) (64 + ((80 + (56 * 64)) * 34) + 56))
 
 #endif  /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c
index 65b7da03..3ba861d 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mmu.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -72,6 +72,19 @@
 		u64 vpfn, size_t nr, bool sync);
 
 /**
+ * kbase_mmu_flush_invalidate_no_ctx() - Flush and invalidate the GPU caches.
+ * @kbdev: Device pointer.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ * @as_nr: GPU address space number for which flush + invalidate is required.
+ *
+ * This is used for MMU tables which do not belong to a user space context.
+ */
+static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev,
+		u64 vpfn, size_t nr, bool sync, int as_nr);
+
+/**
  * kbase_mmu_sync_pgd - sync page directory to memory
  * @kbdev:	Device pointer.
  * @handle:	Address of DMA region.
@@ -103,6 +116,9 @@
 static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
 		struct kbase_as *as, const char *reason_str);
 
+static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+					struct tagged_addr *phys, size_t nr,
+					unsigned long flags);
 
 /**
  * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to
@@ -120,7 +136,8 @@
  *
  * Return: the number of backed pages to increase by
  */
-static size_t reg_grow_calc_extra_pages(struct kbase_va_region *reg, size_t fault_rel_pfn)
+static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
+		struct kbase_va_region *reg, size_t fault_rel_pfn)
 {
 	size_t multiple = reg->extent;
 	size_t reg_current_size = kbase_reg_current_backed_size(reg);
@@ -128,7 +145,7 @@
 	size_t remainder;
 
 	if (!multiple) {
-		dev_warn(reg->kctx->kbdev->dev,
+		dev_warn(kbdev->dev,
 				"VA Region 0x%llx extent was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n",
 				((unsigned long long)reg->start_pfn) << PAGE_SHIFT);
 		return minimum_extra;
@@ -172,21 +189,21 @@
 }
 
 #ifdef CONFIG_MALI_JOB_DUMP
-static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_context *kctx,
+static void kbase_gpu_mmu_handle_write_faulting_as(
 				struct kbase_device *kbdev,
 				struct kbase_as *faulting_as,
 				u64 start_pfn, size_t nr, u32 op)
 {
 	mutex_lock(&kbdev->mmu_hw_mutex);
 
-	kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+	kbase_mmu_hw_clear_fault(kbdev, faulting_as,
 			KBASE_MMU_FAULT_TYPE_PAGE);
-	kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx, start_pfn,
+	kbase_mmu_hw_do_operation(kbdev, faulting_as, start_pfn,
 			nr, op, 1);
 
 	mutex_unlock(&kbdev->mmu_hw_mutex);
 
-	kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+	kbase_mmu_hw_enable_fault(kbdev, faulting_as,
 			KBASE_MMU_FAULT_TYPE_PAGE);
 }
 
@@ -224,23 +241,15 @@
 		return;
 	}
 
-	/* Capture handle and offset of the faulting write location
+	/* Capture addresses of faulting write location
 	 * for job dumping if write tracking is enabled.
 	 */
 	if (kctx->gwt_enabled) {
 		u64 page_addr = faulting_as->fault_addr & PAGE_MASK;
-		u64 offset = (page_addr >> PAGE_SHIFT) - region->start_pfn;
-		u64 handle = region->start_pfn << PAGE_SHIFT;
 		bool found = false;
-
-		if (KBASE_MEM_TYPE_IMPORTED_UMM == region->cpu_alloc->type)
-			handle |= BIT(0);
-
 		/* Check if this write was already handled. */
 		list_for_each_entry(pos, &kctx->gwt_current_list, link) {
-			if (handle == pos->handle &&
-					offset >= pos->offset &&
-					offset < pos->offset + pos->num_pages) {
+			if (page_addr == pos->page_addr) {
 				found = true;
 				break;
 			}
@@ -249,8 +258,8 @@
 		if (!found) {
 			pos = kmalloc(sizeof(*pos), GFP_KERNEL);
 			if (pos) {
-				pos->handle = handle;
-				pos->offset = offset;
+				pos->region = region;
+				pos->page_addr = page_addr;
 				pos->num_pages = 1;
 				list_add(&pos->link, &kctx->gwt_current_list);
 			} else {
@@ -271,7 +280,7 @@
 	else
 		op = AS_COMMAND_FLUSH_PT;
 
-	kbase_gpu_mmu_handle_write_faulting_as(kctx, kbdev, faulting_as,
+	kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as,
 			fault_pfn, 1, op);
 
 	kbase_gpu_vm_unlock(kctx);
@@ -305,6 +314,203 @@
 }
 #endif
 
+#define MAX_POOL_LEVEL 2
+
+/**
+ * page_fault_try_alloc - Try to allocate memory from a context pool
+ * @kctx:          Context pointer
+ * @region:        Region to grow
+ * @new_pages:     Number of 4 kB pages to allocate
+ * @pages_to_grow: Pointer to variable to store number of outstanding pages on
+ *                 failure. This can be either 4 kB or 2 MB pages, depending on
+ *                 the number of pages requested.
+ * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true
+ *                 for 2 MB, false for 4 kB.
+ * @prealloc_sas:  Pointer to kbase_sub_alloc structures
+ *
+ * This function will try to allocate as many pages as possible from the context
+ * pool, then if required will try to allocate the remaining pages from the
+ * device pool.
+ *
+ * This function will not allocate any new memory beyond that that is already
+ * present in the context or device pools. This is because it is intended to be
+ * called with the vm_lock held, which could cause recursive locking if the
+ * allocation caused the out-of-memory killer to run.
+ *
+ * If 2 MB pages are enabled and new_pages is >= 2 MB then pages_to_grow will be
+ * a count of 2 MB pages, otherwise it will be a count of 4 kB pages.
+ *
+ * Return: true if successful, false on failure
+ */
+static bool page_fault_try_alloc(struct kbase_context *kctx,
+		struct kbase_va_region *region, size_t new_pages,
+		int *pages_to_grow, bool *grow_2mb_pool,
+		struct kbase_sub_alloc **prealloc_sas)
+{
+	struct tagged_addr *gpu_pages[MAX_POOL_LEVEL] = {NULL};
+	struct tagged_addr *cpu_pages[MAX_POOL_LEVEL] = {NULL};
+	size_t pages_alloced[MAX_POOL_LEVEL] = {0};
+	struct kbase_mem_pool *pool, *root_pool;
+	int pool_level = 0;
+	bool alloc_failed = false;
+	size_t pages_still_required;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	if (new_pages >= (SZ_2M / SZ_4K)) {
+		root_pool = &kctx->lp_mem_pool;
+		*grow_2mb_pool = true;
+	} else {
+#endif
+		root_pool = &kctx->mem_pool;
+		*grow_2mb_pool = false;
+#ifdef CONFIG_MALI_2MB_ALLOC
+	}
+#endif
+
+	if (region->gpu_alloc != region->cpu_alloc)
+		new_pages *= 2;
+
+	pages_still_required = new_pages;
+
+	/* Determine how many pages are in the pools before trying to allocate.
+	 * Don't attempt to allocate & free if the allocation can't succeed.
+	 */
+	for (pool = root_pool; pool != NULL; pool = pool->next_pool) {
+		size_t pool_size_4k;
+
+		kbase_mem_pool_lock(pool);
+
+		pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
+		if (pool_size_4k >= pages_still_required)
+			pages_still_required = 0;
+		else
+			pages_still_required -= pool_size_4k;
+
+		kbase_mem_pool_unlock(pool);
+
+		if (!pages_still_required)
+			break;
+	}
+
+	if (pages_still_required) {
+		/* Insufficient pages in pools. Don't try to allocate - just
+		 * request a grow.
+		 */
+		*pages_to_grow = pages_still_required;
+
+		return false;
+	}
+
+	/* Since we've dropped the pool locks, the amount of memory in the pools
+	 * may change between the above check and the actual allocation.
+	 */
+	pool = root_pool;
+	for (pool_level = 0; pool_level < MAX_POOL_LEVEL; pool_level++) {
+		size_t pool_size_4k;
+		size_t pages_to_alloc_4k;
+		size_t pages_to_alloc_4k_per_alloc;
+
+		kbase_mem_pool_lock(pool);
+
+		/* Allocate as much as possible from this pool*/
+		pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
+		pages_to_alloc_4k = MIN(new_pages, pool_size_4k);
+		if (region->gpu_alloc == region->cpu_alloc)
+			pages_to_alloc_4k_per_alloc = pages_to_alloc_4k;
+		else
+			pages_to_alloc_4k_per_alloc = pages_to_alloc_4k >> 1;
+
+		pages_alloced[pool_level] = pages_to_alloc_4k;
+		if (pages_to_alloc_4k) {
+			gpu_pages[pool_level] =
+					kbase_alloc_phy_pages_helper_locked(
+						region->gpu_alloc, pool,
+						pages_to_alloc_4k_per_alloc,
+						&prealloc_sas[0]);
+
+			if (!gpu_pages[pool_level]) {
+				alloc_failed = true;
+			} else if (region->gpu_alloc != region->cpu_alloc) {
+				cpu_pages[pool_level] =
+					kbase_alloc_phy_pages_helper_locked(
+						region->cpu_alloc, pool,
+						pages_to_alloc_4k_per_alloc,
+						&prealloc_sas[1]);
+
+				if (!cpu_pages[pool_level])
+					alloc_failed = true;
+			}
+		}
+
+		kbase_mem_pool_unlock(pool);
+
+		if (alloc_failed) {
+			WARN_ON(!new_pages);
+			WARN_ON(pages_to_alloc_4k >= new_pages);
+			WARN_ON(pages_to_alloc_4k_per_alloc >= new_pages);
+			break;
+		}
+
+		new_pages -= pages_to_alloc_4k;
+
+		if (!new_pages)
+			break;
+
+		pool = pool->next_pool;
+		if (!pool)
+			break;
+	}
+
+	if (new_pages) {
+		/* Allocation was unsuccessful */
+		int max_pool_level = pool_level;
+
+		pool = root_pool;
+
+		/* Free memory allocated so far */
+		for (pool_level = 0; pool_level <= max_pool_level;
+				pool_level++) {
+			kbase_mem_pool_lock(pool);
+
+			if (region->gpu_alloc != region->cpu_alloc) {
+				if (pages_alloced[pool_level] &&
+						cpu_pages[pool_level])
+					kbase_free_phy_pages_helper_locked(
+						region->cpu_alloc,
+						pool, cpu_pages[pool_level],
+						pages_alloced[pool_level]);
+			}
+
+			if (pages_alloced[pool_level] && gpu_pages[pool_level])
+				kbase_free_phy_pages_helper_locked(
+						region->gpu_alloc,
+						pool, gpu_pages[pool_level],
+						pages_alloced[pool_level]);
+
+			kbase_mem_pool_unlock(pool);
+
+			pool = pool->next_pool;
+		}
+
+		/*
+		 * If the allocation failed despite there being enough memory in
+		 * the pool, then just fail. Otherwise, try to grow the memory
+		 * pool.
+		 */
+		if (alloc_failed)
+			*pages_to_grow = 0;
+		else
+			*pages_to_grow = new_pages;
+
+		return false;
+	}
+
+	/* Allocation was successful. No pages to grow, return success. */
+	*pages_to_grow = 0;
+
+	return true;
+}
+
 void page_fault_worker(struct work_struct *data)
 {
 	u64 fault_pfn;
@@ -318,6 +524,10 @@
 	struct kbase_va_region *region;
 	int err;
 	bool grown = false;
+	int pages_to_grow;
+	bool grow_2mb_pool;
+	struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
+	int i;
 
 	faulting_as = container_of(data, struct kbase_as, work_pagefault);
 	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
@@ -336,11 +546,10 @@
 
 	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
 
-	if (unlikely(faulting_as->protected_mode))
-	{
+	if (unlikely(faulting_as->protected_mode)) {
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 				"Protected mode fault");
-		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
 				KBASE_MMU_FAULT_TYPE_PAGE);
 
 		goto fault_done;
@@ -403,6 +612,19 @@
 		goto fault_done;
 	}
 
+#ifdef CONFIG_MALI_2MB_ALLOC
+	/* Preallocate memory for the sub-allocation structs if necessary */
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+		prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
+		if (!prealloc_sas[i]) {
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Failed pre-allocating memory for sub-allocations' metadata");
+			goto fault_done;
+		}
+	}
+#endif /* CONFIG_MALI_2MB_ALLOC */
+
+page_fault_retry:
 	/* so we have a translation fault, let's see if it is for growable
 	 * memory */
 	kbase_gpu_vm_lock(kctx);
@@ -451,7 +673,7 @@
 
 		mutex_lock(&kbdev->mmu_hw_mutex);
 
-		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
 				KBASE_MMU_FAULT_TYPE_PAGE);
 		/* [1] in case another page fault occurred while we were
 		 * handling the (duplicate) page fault we need to ensure we
@@ -461,19 +683,19 @@
 		 * transaction (which should cause the other page fault to be
 		 * raised again).
 		 */
-		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0,
 				AS_COMMAND_UNLOCK, 1);
 
 		mutex_unlock(&kbdev->mmu_hw_mutex);
 
-		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as,
 				KBASE_MMU_FAULT_TYPE_PAGE);
 		kbase_gpu_vm_unlock(kctx);
 
 		goto fault_done;
 	}
 
-	new_pages = reg_grow_calc_extra_pages(region, fault_rel_pfn);
+	new_pages = reg_grow_calc_extra_pages(kbdev, region, fault_rel_pfn);
 
 	/* cap to max vsize */
 	new_pages = min(new_pages, region->nr_pages - kbase_reg_current_backed_size(region));
@@ -482,34 +704,26 @@
 		mutex_lock(&kbdev->mmu_hw_mutex);
 
 		/* Duplicate of a fault we've already handled, nothing to do */
-		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
 				KBASE_MMU_FAULT_TYPE_PAGE);
 		/* See comment [1] about UNLOCK usage */
-		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0,
 				AS_COMMAND_UNLOCK, 1);
 
 		mutex_unlock(&kbdev->mmu_hw_mutex);
 
-		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as,
 				KBASE_MMU_FAULT_TYPE_PAGE);
 		kbase_gpu_vm_unlock(kctx);
 		goto fault_done;
 	}
 
-	if (kbase_alloc_phy_pages_helper(region->gpu_alloc, new_pages) == 0) {
-		if (region->gpu_alloc != region->cpu_alloc) {
-			if (kbase_alloc_phy_pages_helper(
-					region->cpu_alloc, new_pages) == 0) {
-				grown = true;
-			} else {
-				kbase_free_phy_pages_helper(region->gpu_alloc,
-						new_pages);
-			}
-		} else {
-			grown = true;
-		}
-	}
+	pages_to_grow = 0;
 
+	spin_lock(&kctx->mem_partials_lock);
+	grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow,
+			&grow_2mb_pool, prealloc_sas);
+	spin_unlock(&kctx->mem_partials_lock);
 
 	if (grown) {
 		u64 pfn_offset;
@@ -528,7 +742,7 @@
 		 * so the no_flush version of insert_pages is used which allows
 		 * us to unlock the MMU as we see fit.
 		 */
-		err = kbase_mmu_insert_pages_no_flush(kctx,
+		err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu,
 				region->start_pfn + pfn_offset,
 				&kbase_get_gpu_phy_pages(region)[pfn_offset],
 				new_pages, region->flags);
@@ -565,10 +779,10 @@
 		 * this stage a new IRQ might not be raised when the GPU finds
 		 * a MMU IRQ is already pending.
 		 */
-		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
 					 KBASE_MMU_FAULT_TYPE_PAGE);
 
-		kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx,
+		kbase_mmu_hw_do_operation(kbdev, faulting_as,
 					  faulting_as->fault_addr >> PAGE_SHIFT,
 					  new_pages,
 					  op, 1);
@@ -577,7 +791,7 @@
 		/* AS transaction end */
 
 		/* reenable this in the mask */
-		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as,
 					 KBASE_MMU_FAULT_TYPE_PAGE);
 
 #ifdef CONFIG_MALI_JOB_DUMP
@@ -587,12 +801,13 @@
 
 			pos = kmalloc(sizeof(*pos), GFP_KERNEL);
 			if (pos) {
-				pos->handle = region->start_pfn << PAGE_SHIFT;
-				pos->offset = pfn_offset;
+				pos->region = region;
+				pos->page_addr = (region->start_pfn +
+							pfn_offset) <<
+							 PAGE_SHIFT;
 				pos->num_pages = new_pages;
 				list_add(&pos->link,
 					&kctx->gwt_current_list);
-
 			} else {
 				dev_warn(kbdev->dev, "kmalloc failure");
 			}
@@ -600,13 +815,43 @@
 #endif
 		kbase_gpu_vm_unlock(kctx);
 	} else {
-		/* failed to extend, handle as a normal PF */
+		int ret = -ENOMEM;
+
 		kbase_gpu_vm_unlock(kctx);
-		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Page allocation failure");
+
+		/* If the memory pool was insufficient then grow it and retry.
+		 * Otherwise fail the allocation.
+		 */
+		if (pages_to_grow > 0) {
+#ifdef CONFIG_MALI_2MB_ALLOC
+			if (grow_2mb_pool) {
+				/* Round page requirement up to nearest 2 MB */
+				pages_to_grow = (pages_to_grow +
+					((1 << kctx->lp_mem_pool.order) - 1))
+						>> kctx->lp_mem_pool.order;
+				ret = kbase_mem_pool_grow(&kctx->lp_mem_pool,
+						pages_to_grow);
+			} else {
+#endif
+				ret = kbase_mem_pool_grow(&kctx->mem_pool,
+						pages_to_grow);
+#ifdef CONFIG_MALI_2MB_ALLOC
+			}
+#endif
+		}
+		if (ret < 0) {
+			/* failed to extend, handle as a normal PF */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page allocation failure");
+		} else {
+			goto page_fault_retry;
+		}
 	}
 
 fault_done:
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
+		kfree(prealloc_sas[i]);
+
 	/*
 	 * By this point, the fault was handled in some way,
 	 * so release the ctx refcount
@@ -616,54 +861,57 @@
 	atomic_dec(&kbdev->faults_pending);
 }
 
-phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
+static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut)
 {
 	u64 *page;
 	int i;
 	struct page *p;
-	int new_page_count __maybe_unused;
 
-	KBASE_DEBUG_ASSERT(NULL != kctx);
-	new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages);
-	kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
-
-	p = kbase_mem_pool_alloc(&kctx->mem_pool);
+	p = kbase_mem_pool_alloc(&kbdev->mem_pool);
 	if (!p)
-		goto sub_pages;
-
-	KBASE_TLSTREAM_AUX_PAGESALLOC(
-			kctx->id,
-			(u64)new_page_count);
+		return 0;
 
 	page = kmap(p);
 	if (NULL == page)
 		goto alloc_free;
 
-	kbase_process_page_usage_inc(kctx, 1);
+	/* If the MMU tables belong to a context then account the memory usage
+	 * to that context, otherwise the MMU tables are device wide and are
+	 * only accounted to the device.
+	 */
+	if (mmut->kctx) {
+		int new_page_count;
+
+		new_page_count = kbase_atomic_add_pages(1,
+				&mmut->kctx->used_pages);
+		KBASE_TLSTREAM_AUX_PAGESALLOC(
+				mmut->kctx->id,
+				(u64)new_page_count);
+		kbase_process_page_usage_inc(mmut->kctx, 1);
+	}
+
+	kbase_atomic_add_pages(1, &kbdev->memdev.used_pages);
 
 	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
-		kctx->kbdev->mmu_mode->entry_invalidate(&page[i]);
+		kbdev->mmu_mode->entry_invalidate(&page[i]);
 
-	kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+	kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE);
 
 	kunmap(p);
 	return page_to_phys(p);
 
 alloc_free:
-	kbase_mem_pool_free(&kctx->mem_pool, p, false);
-sub_pages:
-	kbase_atomic_sub_pages(1, &kctx->used_pages);
-	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+	kbase_mem_pool_free(&kbdev->mem_pool, p, false);
 
 	return 0;
 }
 
-KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd);
-
 /* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
  * new table from the pool if needed and possible
  */
-static int mmu_get_next_pgd(struct kbase_context *kctx,
+static int mmu_get_next_pgd(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut,
 		phys_addr_t *pgd, u64 vpfn, int level)
 {
 	u64 *page;
@@ -671,9 +919,8 @@
 	struct page *p;
 
 	KBASE_DEBUG_ASSERT(*pgd);
-	KBASE_DEBUG_ASSERT(NULL != kctx);
 
-	lockdep_assert_held(&kctx->mmu_lock);
+	lockdep_assert_held(&mmut->mmu_lock);
 
 	/*
 	 * Architecture spec defines level-0 as being the top-most.
@@ -685,23 +932,24 @@
 	p = pfn_to_page(PFN_DOWN(*pgd));
 	page = kmap(p);
 	if (NULL == page) {
-		dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
+		dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
 		return -EINVAL;
 	}
 
-	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+	target_pgd = kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
 
 	if (!target_pgd) {
-		target_pgd = kbase_mmu_alloc_pgd(kctx);
+		target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
 		if (!target_pgd) {
-			dev_dbg(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
+			dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n",
+					__func__);
 			kunmap(p);
 			return -ENOMEM;
 		}
 
-		kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
+		kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
 
-		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+		kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE);
 		/* Rely on the caller to update the address space flags. */
 	}
 
@@ -714,7 +962,8 @@
 /*
  * Returns the PGD for the specified level of translation
  */
-static int mmu_get_pgd_at_level(struct kbase_context *kctx,
+static int mmu_get_pgd_at_level(struct kbase_device *kbdev,
+					struct kbase_mmu_table *mmut,
 					u64 vpfn,
 					unsigned int level,
 					phys_addr_t *out_pgd)
@@ -722,14 +971,14 @@
 	phys_addr_t pgd;
 	int l;
 
-	lockdep_assert_held(&kctx->mmu_lock);
-	pgd = kctx->pgd;
+	lockdep_assert_held(&mmut->mmu_lock);
+	pgd = mmut->pgd;
 
 	for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) {
-		int err = mmu_get_next_pgd(kctx, &pgd, vpfn, l);
+		int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
 		/* Handle failure condition */
 		if (err) {
-			dev_dbg(kctx->kbdev->dev,
+			dev_dbg(kbdev->dev,
 				 "%s: mmu_get_next_pgd failure at level %d\n",
 				 __func__, l);
 			return err;
@@ -741,27 +990,30 @@
 	return 0;
 }
 
-#define mmu_get_bottom_pgd(kctx, vpfn, out_pgd) \
-	mmu_get_pgd_at_level((kctx), (vpfn), MIDGARD_MMU_BOTTOMLEVEL, (out_pgd))
+static int mmu_get_bottom_pgd(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut,
+		u64 vpfn,
+		phys_addr_t *out_pgd)
+{
+	return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL,
+			out_pgd);
+}
 
-
-static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx,
-					      u64 from_vpfn, u64 to_vpfn)
+static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut,
+		u64 from_vpfn, u64 to_vpfn)
 {
 	phys_addr_t pgd;
 	u64 vpfn = from_vpfn;
 	struct kbase_mmu_mode const *mmu_mode;
 
-	KBASE_DEBUG_ASSERT(NULL != kctx);
-	KBASE_DEBUG_ASSERT(0 != vpfn);
 	/* 64-bit address range is the max */
 	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
 	KBASE_DEBUG_ASSERT(from_vpfn <= to_vpfn);
 
-	lockdep_assert_held(&kctx->mmu_lock);
-	lockdep_assert_held(&kctx->reg_lock);
+	lockdep_assert_held(&mmut->mmu_lock);
 
-	mmu_mode = kctx->kbdev->mmu_mode;
+	mmu_mode = kbdev->mmu_mode;
 
 	while (vpfn < to_vpfn) {
 		unsigned int i;
@@ -776,7 +1028,7 @@
 			count = left;
 
 		/* need to check if this is a 2MB page or a 4kB */
-		pgd = kctx->pgd;
+		pgd = mmut->pgd;
 
 		for (level = MIDGARD_MMU_TOPLEVEL;
 				level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
@@ -798,7 +1050,7 @@
 			pcount = count;
 			break;
 		default:
-			dev_warn(kctx->kbdev->dev, "%sNo support for ATEs at level %d\n",
+			dev_warn(kbdev->dev, "%sNo support for ATEs at level %d\n",
 			       __func__, level);
 			goto next;
 		}
@@ -807,7 +1059,7 @@
 		for (i = 0; i < pcount; i++)
 			mmu_mode->entry_invalidate(&page[idx + i]);
 
-		kbase_mmu_sync_pgd(kctx->kbdev,
+		kbase_mmu_sync_pgd(kbdev,
 				   kbase_dma_addr(phys_to_page(pgd)) + 8 * idx,
 				   8 * pcount);
 		kunmap(phys_to_page(pgd));
@@ -836,7 +1088,6 @@
 	struct kbase_mmu_mode const *mmu_mode;
 
 	KBASE_DEBUG_ASSERT(NULL != kctx);
-	KBASE_DEBUG_ASSERT(0 != vpfn);
 	/* 64-bit address range is the max */
 	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
 
@@ -846,7 +1097,7 @@
 	if (nr == 0)
 		return 0;
 
-	mutex_lock(&kctx->mmu_lock);
+	mutex_lock(&kctx->mmu.mmu_lock);
 
 	while (remain) {
 		unsigned int i;
@@ -865,27 +1116,27 @@
 		 * 256 pages at once (on average). Do we really care?
 		 */
 		do {
-			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+			err = mmu_get_bottom_pgd(kctx->kbdev, &kctx->mmu,
+					vpfn, &pgd);
 			if (err != -ENOMEM)
 				break;
 			/* Fill the memory pool with enough pages for
 			 * the page walk to succeed
 			 */
-			mutex_unlock(&kctx->mmu_lock);
-			err = kbase_mem_pool_grow(&kctx->mem_pool,
+			mutex_unlock(&kctx->mmu.mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->kbdev->mem_pool,
 					MIDGARD_MMU_BOTTOMLEVEL);
-			mutex_lock(&kctx->mmu_lock);
+			mutex_lock(&kctx->mmu.mmu_lock);
 		} while (!err);
 		if (err) {
 			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
 			if (recover_required) {
 				/* Invalidate the pages we have partially
 				 * completed */
-				mmu_insert_pages_failure_recovery(kctx,
-								  recover_vpfn,
-								  recover_vpfn +
-								  recover_count
-								  );
+				mmu_insert_pages_failure_recovery(kctx->kbdev,
+						&kctx->mmu,
+						recover_vpfn,
+						recover_vpfn + recover_count);
 			}
 			goto fail_unlock;
 		}
@@ -897,11 +1148,10 @@
 			if (recover_required) {
 				/* Invalidate the pages we have partially
 				 * completed */
-				mmu_insert_pages_failure_recovery(kctx,
-								  recover_vpfn,
-								  recover_vpfn +
-								  recover_count
-								  );
+				mmu_insert_pages_failure_recovery(kctx->kbdev,
+						&kctx->mmu,
+						recover_vpfn,
+						recover_vpfn + recover_count);
 			}
 			err = -ENOMEM;
 			goto fail_unlock;
@@ -932,30 +1182,38 @@
 		recover_required = true;
 		recover_count += count;
 	}
-	mutex_unlock(&kctx->mmu_lock);
+	mutex_unlock(&kctx->mmu.mmu_lock);
 	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
 	return 0;
 
 fail_unlock:
-	mutex_unlock(&kctx->mmu_lock);
+	mutex_unlock(&kctx->mmu.mmu_lock);
 	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
 	return err;
 }
 
-static inline void cleanup_empty_pte(struct kbase_context *kctx, u64 *pte)
+static inline void cleanup_empty_pte(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut, u64 *pte)
 {
 	phys_addr_t tmp_pgd;
 	struct page *tmp_p;
 
-	tmp_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(*pte);
+	tmp_pgd = kbdev->mmu_mode->pte_to_phy_addr(*pte);
 	tmp_p = phys_to_page(tmp_pgd);
-	kbase_mem_pool_free(&kctx->mem_pool, tmp_p, false);
-	kbase_process_page_usage_dec(kctx, 1);
-	kbase_atomic_sub_pages(1, &kctx->used_pages);
-	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+	kbase_mem_pool_free(&kbdev->mem_pool, tmp_p, false);
+
+	/* If the MMU tables belong to a context then we accounted the memory
+	 * usage to that context, so decrement here.
+	 */
+	if (mmut->kctx) {
+		kbase_process_page_usage_dec(mmut->kctx, 1);
+		kbase_atomic_sub_pages(1, &mmut->kctx->used_pages);
+	}
+	kbase_atomic_sub_pages(1, &kbdev->memdev.used_pages);
 }
 
-int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx,
+int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
+				    struct kbase_mmu_table *mmut,
 				    const u64 start_vpfn,
 				    struct tagged_addr *phys, size_t nr,
 				    unsigned long flags)
@@ -967,18 +1225,17 @@
 	int err;
 	struct kbase_mmu_mode const *mmu_mode;
 
-	KBASE_DEBUG_ASSERT(kctx);
-	KBASE_DEBUG_ASSERT(start_vpfn);
+	/* Note that 0 is a valid start_vpfn */
 	/* 64-bit address range is the max */
 	KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
 
-	mmu_mode = kctx->kbdev->mmu_mode;
+	mmu_mode = kbdev->mmu_mode;
 
 	/* Early out if there is nothing to do */
 	if (nr == 0)
 		return 0;
 
-	mutex_lock(&kctx->mmu_lock);
+	mutex_lock(&mmut->mmu_lock);
 
 	while (remain) {
 		unsigned int i;
@@ -1003,28 +1260,27 @@
 		 * 256 pages at once (on average). Do we really care?
 		 */
 		do {
-			err = mmu_get_pgd_at_level(kctx, insert_vpfn, cur_level,
-						   &pgd);
+			err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn,
+						   cur_level, &pgd);
 			if (err != -ENOMEM)
 				break;
 			/* Fill the memory pool with enough pages for
 			 * the page walk to succeed
 			 */
-			mutex_unlock(&kctx->mmu_lock);
-			err = kbase_mem_pool_grow(&kctx->mem_pool,
+			mutex_unlock(&mmut->mmu_lock);
+			err = kbase_mem_pool_grow(&kbdev->mem_pool,
 					cur_level);
-			mutex_lock(&kctx->mmu_lock);
+			mutex_lock(&mmut->mmu_lock);
 		} while (!err);
 
 		if (err) {
-			dev_warn(kctx->kbdev->dev,
+			dev_warn(kbdev->dev,
 				 "%s: mmu_get_bottom_pgd failure\n", __func__);
 			if (insert_vpfn != start_vpfn) {
 				/* Invalidate the pages we have partially
 				 * completed */
-				mmu_insert_pages_failure_recovery(kctx,
-								  start_vpfn,
-								  insert_vpfn);
+				mmu_insert_pages_failure_recovery(kbdev,
+						mmut, start_vpfn, insert_vpfn);
 			}
 			goto fail_unlock;
 		}
@@ -1032,14 +1288,13 @@
 		p = pfn_to_page(PFN_DOWN(pgd));
 		pgd_page = kmap(p);
 		if (!pgd_page) {
-			dev_warn(kctx->kbdev->dev, "%s: kmap failure\n",
+			dev_warn(kbdev->dev, "%s: kmap failure\n",
 				 __func__);
 			if (insert_vpfn != start_vpfn) {
 				/* Invalidate the pages we have partially
 				 * completed */
-				mmu_insert_pages_failure_recovery(kctx,
-								  start_vpfn,
-								  insert_vpfn);
+				mmu_insert_pages_failure_recovery(kbdev,
+						mmut, start_vpfn, insert_vpfn);
 			}
 			err = -ENOMEM;
 			goto fail_unlock;
@@ -1050,7 +1305,7 @@
 			u64 *target = &pgd_page[level_index];
 
 			if (mmu_mode->pte_is_valid(*target, cur_level))
-				cleanup_empty_pte(kctx, target);
+				cleanup_empty_pte(kbdev, mmut, target);
 			mmu_mode->entry_set_ate(target, *phys, flags,
 						cur_level);
 		} else {
@@ -1058,18 +1313,16 @@
 				unsigned int ofs = vindex + i;
 				u64 *target = &pgd_page[ofs];
 
-				/* Fail if the current page is a valid ATE entry
-				 * unless gwt_was_enabled as in that case all
-				 * pages will be valid from when
-				 * kbase_gpu_gwt_start() cleared the gpu
-				 * write flag.
+				/* Warn if the current page is a valid ATE
+				 * entry. The page table shouldn't have anything
+				 * in the place where we are trying to put a
+				 * new entry. Modification to page table entries
+				 * should be performed with
+				 * kbase_mmu_update_pages()
 				 */
-#ifdef CONFIG_MALI_JOB_DUMP
-				if (!kctx->gwt_was_enabled)
-#endif
-					KBASE_DEBUG_ASSERT
-						(0 == (*target & 1UL));
-				kctx->kbdev->mmu_mode->entry_set_ate(target,
+				WARN_ON((*target & 1UL) != 0);
+
+				kbdev->mmu_mode->entry_set_ate(target,
 						phys[i], flags, cur_level);
 			}
 		}
@@ -1078,32 +1331,39 @@
 		insert_vpfn += count;
 		remain -= count;
 
-		kbase_mmu_sync_pgd(kctx->kbdev,
+		kbase_mmu_sync_pgd(kbdev,
 				kbase_dma_addr(p) + (vindex * sizeof(u64)),
 				count * sizeof(u64));
 
 		kunmap(p);
 	}
 
-	mutex_unlock(&kctx->mmu_lock);
-	return 0;
+	err = 0;
 
 fail_unlock:
-	mutex_unlock(&kctx->mmu_lock);
+	mutex_unlock(&mmut->mmu_lock);
 	return err;
 }
 
 /*
- * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space
+ * number 'as_nr'.
  */
-int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
-				  struct tagged_addr *phys, size_t nr,
-				  unsigned long flags)
+int kbase_mmu_insert_pages(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut, u64 vpfn,
+		struct tagged_addr *phys, size_t nr,
+		unsigned long flags, int as_nr)
 {
 	int err;
 
-	err = kbase_mmu_insert_pages_no_flush(kctx, vpfn, phys, nr, flags);
-	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn,
+			phys, nr, flags);
+
+	if (mmut->kctx)
+		kbase_mmu_flush_invalidate(mmut->kctx, vpfn, nr, false);
+	else
+		kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, nr, false, as_nr);
+
 	return err;
 }
 
@@ -1138,7 +1398,7 @@
 
 	err = kbase_mmu_hw_do_operation(kbdev,
 				&kbdev->as[kctx->as_nr],
-				kctx, vpfn, nr, op, 0);
+				vpfn, nr, op, 0);
 #if KBASE_GPU_RESET_EN
 	if (err) {
 		/* Flush failed to complete, assume the
@@ -1163,14 +1423,83 @@
 #endif /* !CONFIG_MALI_NO_MALI */
 }
 
+/* Perform a flush/invalidate on a particular address space
+ */
+static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev,
+		struct kbase_as *as,
+		u64 vpfn, size_t nr, bool sync, bool drain_pending)
+{
+	int err;
+	u32 op;
+
+	if (kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		/* GPU is off so there's no need to perform flush/invalidate */
+		return;
+	}
+
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+
+	if (sync)
+		op = AS_COMMAND_FLUSH_MEM;
+	else
+		op = AS_COMMAND_FLUSH_PT;
+
+	err = kbase_mmu_hw_do_operation(kbdev,
+			as, vpfn, nr, op, 0);
+
+#if KBASE_GPU_RESET_EN
+	if (err) {
+		/* Flush failed to complete, assume the GPU has hung and
+		 * perform a reset to recover
+		 */
+		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+
+		if (kbase_prepare_to_reset_gpu(kbdev))
+			kbase_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+
+#ifndef CONFIG_MALI_NO_MALI
+	/*
+	 * The transaction lock must be dropped before here
+	 * as kbase_wait_write_flush could take it if
+	 * the GPU was powered down (static analysis doesn't
+	 * know this can't happen).
+	 */
+	drain_pending |= (!err) && sync &&
+		kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367);
+	if (drain_pending) {
+		/* Wait for GPU to flush write buffer */
+		kbase_wait_write_flush(kbdev);
+	}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+	kbase_pm_context_idle(kbdev);
+}
+
+static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev,
+		u64 vpfn, size_t nr, bool sync, int as_nr)
+{
+	/* Skip if there is nothing to do */
+	if (nr) {
+		kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], vpfn,
+					nr, sync, false);
+	}
+}
+
 static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
 		u64 vpfn, size_t nr, bool sync)
 {
 	struct kbase_device *kbdev;
 	bool ctx_is_in_runpool;
-#ifndef CONFIG_MALI_NO_MALI
 	bool drain_pending = false;
 
+#ifndef CONFIG_MALI_NO_MALI
 	if (atomic_xchg(&kctx->drain_pending, 0))
 		drain_pending = true;
 #endif /* !CONFIG_MALI_NO_MALI */
@@ -1187,71 +1516,22 @@
 	if (ctx_is_in_runpool) {
 		KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
 
-		if (!kbase_pm_context_active_handle_suspend(kbdev,
-			KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
-			int err;
-			u32 op;
+		kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr],
+				vpfn, nr, sync, drain_pending);
 
-			/* AS transaction begin */
-			mutex_lock(&kbdev->mmu_hw_mutex);
-
-			if (sync)
-				op = AS_COMMAND_FLUSH_MEM;
-			else
-				op = AS_COMMAND_FLUSH_PT;
-
-			err = kbase_mmu_hw_do_operation(kbdev,
-						&kbdev->as[kctx->as_nr],
-						kctx, vpfn, nr, op, 0);
-
-#if KBASE_GPU_RESET_EN
-			if (err) {
-				/* Flush failed to complete, assume the
-				 * GPU has hung and perform a reset to
-				 * recover */
-				dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
-
-				if (kbase_prepare_to_reset_gpu(kbdev))
-					kbase_reset_gpu(kbdev);
-			}
-#endif /* KBASE_GPU_RESET_EN */
-
-			mutex_unlock(&kbdev->mmu_hw_mutex);
-			/* AS transaction end */
-
-#ifndef CONFIG_MALI_NO_MALI
-			/*
-			 * The transaction lock must be dropped before here
-			 * as kbase_wait_write_flush could take it if
-			 * the GPU was powered down (static analysis doesn't
-			 * know this can't happen).
-			 */
-			drain_pending |= (!err) && sync &&
-					kbase_hw_has_issue(kctx->kbdev,
-							BASE_HW_ISSUE_6367);
-			if (drain_pending) {
-				/* Wait for GPU to flush write buffer */
-				kbase_wait_write_flush(kctx);
-			}
-#endif /* !CONFIG_MALI_NO_MALI */
-
-			kbase_pm_context_idle(kbdev);
-		}
 		kbasep_js_runpool_release_ctx(kbdev, kctx);
 	}
 }
 
-void kbase_mmu_update(struct kbase_context *kctx)
+void kbase_mmu_update(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut,
+		int as_nr)
 {
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
-	lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
-	/* ASSERT that the context has a valid as_nr, which is only the case
-	 * when it's scheduled in.
-	 *
-	 * as_nr won't change because the caller has the hwaccess_lock */
-	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	KBASE_DEBUG_ASSERT(as_nr != KBASEP_AS_NR_INVALID);
 
-	kctx->kbdev->mmu_mode->update(kctx);
+	kbdev->mmu_mode->update(kbdev, mmut, as_nr);
 }
 KBASE_EXPORT_TEST_API(kbase_mmu_update);
 
@@ -1298,24 +1578,22 @@
  * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
  * information.
  */
-int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
+int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
+	struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, int as_nr)
 {
 	phys_addr_t pgd;
 	size_t requested_nr = nr;
 	struct kbase_mmu_mode const *mmu_mode;
 	int err = -EFAULT;
 
-	KBASE_DEBUG_ASSERT(NULL != kctx);
-	beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
-
 	if (0 == nr) {
 		/* early out if nothing to do */
 		return 0;
 	}
 
-	mutex_lock(&kctx->mmu_lock);
+	mutex_lock(&mmut->mmu_lock);
 
-	mmu_mode = kctx->kbdev->mmu_mode;
+	mmu_mode = kbdev->mmu_mode;
 
 	while (nr) {
 		unsigned int i;
@@ -1329,7 +1607,7 @@
 			count = nr;
 
 		/* need to check if this is a 2MB or a 4kB page */
-		pgd = kctx->pgd;
+		pgd = mmut->pgd;
 
 		for (level = MIDGARD_MMU_TOPLEVEL;
 				level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
@@ -1367,7 +1645,7 @@
 		switch (level) {
 		case MIDGARD_MMU_LEVEL(0):
 		case MIDGARD_MMU_LEVEL(1):
-			dev_warn(kctx->kbdev->dev,
+			dev_warn(kbdev->dev,
 				 "%s: No support for ATEs at level %d\n",
 				 __func__, level);
 			kunmap(phys_to_page(pgd));
@@ -1377,7 +1655,7 @@
 			if (count >= 512) {
 				pcount = 1;
 			} else {
-				dev_warn(kctx->kbdev->dev,
+				dev_warn(kbdev->dev,
 					 "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n",
 					 __func__, count);
 				pcount = 0;
@@ -1388,7 +1666,7 @@
 			pcount = count;
 			break;
 		default:
-			dev_err(kctx->kbdev->dev,
+			dev_err(kbdev->dev,
 				"%s: found non-mapped memory, early out\n",
 				__func__);
 			vpfn += count;
@@ -1400,7 +1678,7 @@
 		for (i = 0; i < pcount; i++)
 			mmu_mode->entry_invalidate(&page[index + i]);
 
-		kbase_mmu_sync_pgd(kctx->kbdev,
+		kbase_mmu_sync_pgd(kbdev,
 				   kbase_dma_addr(phys_to_page(pgd)) +
 				   8 * index, 8*pcount);
 
@@ -1411,26 +1689,35 @@
 	}
 	err = 0;
 out:
-	mutex_unlock(&kctx->mmu_lock);
-	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	mutex_unlock(&mmut->mmu_lock);
+
+	if (mmut->kctx)
+		kbase_mmu_flush_invalidate(mmut->kctx, vpfn, requested_nr, true);
+	else
+		kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, requested_nr, true, as_nr);
+
 	return err;
 }
 
 KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
 
 /**
- * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'.
- * This call is being triggered as a response to the changes of the mem attributes
+ * kbase_mmu_update_pages_no_flush() - Update page table entries on the GPU
  *
- * @pre : The caller is responsible for validating the memory attributes
+ * This will update page table entries that already exist on the GPU based on
+ * the new flags that are passed. It is used as a response to the changes of
+ * the memory attributes
  *
- * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
- * currently scheduled into the runpool, and so potentially uses a lot of locks.
- * These locks must be taken in the correct order with respect to others
- * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
- * information.
+ * The caller is responsible for validating the memory attributes
+ *
+ * @kctx:  Kbase context
+ * @vpfn:  Virtual PFN (Page Frame Number) of the first page to update
+ * @phys:  Tagged physical addresses of the physical pages to replace the
+ *         current mappings
+ * @nr:    Number of pages to update
+ * @flags: Flags
  */
-int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 					struct tagged_addr *phys, size_t nr,
 					unsigned long flags)
 {
@@ -1440,14 +1727,13 @@
 	int err;
 
 	KBASE_DEBUG_ASSERT(NULL != kctx);
-	KBASE_DEBUG_ASSERT(0 != vpfn);
 	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
 
 	/* Early out if there is nothing to do */
 	if (nr == 0)
 		return 0;
 
-	mutex_lock(&kctx->mmu_lock);
+	mutex_lock(&kctx->mmu.mmu_lock);
 
 	mmu_mode = kctx->kbdev->mmu_mode;
 
@@ -1461,16 +1747,17 @@
 			count = nr;
 
 		do {
-			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+			err = mmu_get_bottom_pgd(kctx->kbdev, &kctx->mmu,
+					vpfn, &pgd);
 			if (err != -ENOMEM)
 				break;
 			/* Fill the memory pool with enough pages for
 			 * the page walk to succeed
 			 */
-			mutex_unlock(&kctx->mmu_lock);
-			err = kbase_mem_pool_grow(&kctx->mem_pool,
+			mutex_unlock(&kctx->mmu.mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->kbdev->mem_pool,
 					MIDGARD_MMU_BOTTOMLEVEL);
-			mutex_lock(&kctx->mmu_lock);
+			mutex_lock(&kctx->mmu.mmu_lock);
 		} while (!err);
 		if (err) {
 			dev_warn(kctx->kbdev->dev,
@@ -1501,11 +1788,11 @@
 		kunmap(pfn_to_page(PFN_DOWN(pgd)));
 	}
 
-	mutex_unlock(&kctx->mmu_lock);
+	mutex_unlock(&kctx->mmu.mmu_lock);
 	return 0;
 
 fail_unlock:
-	mutex_unlock(&kctx->mmu_lock);
+	mutex_unlock(&kctx->mmu.mmu_lock);
 	return err;
 }
 
@@ -1520,8 +1807,9 @@
 	return err;
 }
 
-static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd,
-			       int level, u64 *pgd_page_buffer)
+static void mmu_teardown_level(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut, phys_addr_t pgd,
+		int level, u64 *pgd_page_buffer)
 {
 	phys_addr_t target_pgd;
 	struct page *p;
@@ -1529,9 +1817,7 @@
 	int i;
 	struct kbase_mmu_mode const *mmu_mode;
 
-	KBASE_DEBUG_ASSERT(NULL != kctx);
-	lockdep_assert_held(&kctx->mmu_lock);
-	lockdep_assert_held(&kctx->reg_lock);
+	lockdep_assert_held(&mmut->mmu_lock);
 
 	pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
 	/* kmap_atomic should NEVER fail. */
@@ -1542,14 +1828,14 @@
 	kunmap_atomic(pgd_page);
 	pgd_page = pgd_page_buffer;
 
-	mmu_mode = kctx->kbdev->mmu_mode;
+	mmu_mode = kbdev->mmu_mode;
 
 	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
 		target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
 
 		if (target_pgd) {
 			if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
-				mmu_teardown_level(kctx,
+				mmu_teardown_level(kbdev, mmut,
 						   target_pgd,
 						   level + 1,
 						   pgd_page_buffer +
@@ -1559,56 +1845,69 @@
 	}
 
 	p = pfn_to_page(PFN_DOWN(pgd));
-	kbase_mem_pool_free(&kctx->mem_pool, p, true);
-	kbase_process_page_usage_dec(kctx, 1);
-	kbase_atomic_sub_pages(1, &kctx->used_pages);
-	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+	kbase_mem_pool_free(&kbdev->mem_pool, p, true);
+	kbase_atomic_sub_pages(1, &kbdev->memdev.used_pages);
+
+	/* If MMU tables belong to a context then pages will have been accounted
+	 * against it, so we must decrement the usage counts here.
+	 */
+	if (mmut->kctx) {
+		kbase_process_page_usage_dec(mmut->kctx, 1);
+		kbase_atomic_sub_pages(1, &mmut->kctx->used_pages);
+	}
 }
 
-int kbase_mmu_init(struct kbase_context *kctx)
+int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+		struct kbase_context *kctx)
 {
-	KBASE_DEBUG_ASSERT(NULL != kctx);
-	KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
-
-	mutex_init(&kctx->mmu_lock);
+	mutex_init(&mmut->mmu_lock);
+	mmut->kctx = kctx;
 
 	/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
-	kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+	mmut->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
 
-	if (NULL == kctx->mmu_teardown_pages)
+	if (mmut->mmu_teardown_pages == NULL)
 		return -ENOMEM;
 
+	mmut->pgd = 0;
+	/* We allocate pages into the kbdev memory pool, then
+	 * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to
+	 * avoid allocations from the kernel happening with the lock held.
+	 */
+	while (!mmut->pgd) {
+		int err;
+
+		err = kbase_mem_pool_grow(&kbdev->mem_pool,
+				MIDGARD_MMU_BOTTOMLEVEL);
+		if (err) {
+			kbase_mmu_term(kbdev, mmut);
+			return -ENOMEM;
+		}
+
+		mutex_lock(&mmut->mmu_lock);
+		mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
+		mutex_unlock(&mmut->mmu_lock);
+	}
+
 	return 0;
 }
 
-void kbase_mmu_term(struct kbase_context *kctx)
+void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
 {
-	KBASE_DEBUG_ASSERT(NULL != kctx);
-	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+	if (mmut->pgd) {
+		mutex_lock(&mmut->mmu_lock);
+		mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL,
+				mmut->mmu_teardown_pages);
+		mutex_unlock(&mmut->mmu_lock);
 
-	kfree(kctx->mmu_teardown_pages);
-	kctx->mmu_teardown_pages = NULL;
+		if (mmut->kctx)
+			KBASE_TLSTREAM_AUX_PAGESALLOC(mmut->kctx->id, 0);
+	}
+
+	kfree(mmut->mmu_teardown_pages);
+	mutex_destroy(&mmut->mmu_lock);
 }
 
-void kbase_mmu_free_pgd(struct kbase_context *kctx)
-{
-	int new_page_count = 0;
-
-	KBASE_DEBUG_ASSERT(NULL != kctx);
-	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
-
-	mutex_lock(&kctx->mmu_lock);
-	mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL,
-			   kctx->mmu_teardown_pages);
-	mutex_unlock(&kctx->mmu_lock);
-
-	KBASE_TLSTREAM_AUX_PAGESALLOC(
-			kctx->id,
-			(u64)new_page_count);
-}
-
-KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
-
 static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
 {
 	phys_addr_t target_pgd;
@@ -1619,7 +1918,7 @@
 	struct kbase_mmu_mode const *mmu_mode;
 
 	KBASE_DEBUG_ASSERT(NULL != kctx);
-	lockdep_assert_held(&kctx->mmu_lock);
+	lockdep_assert_held(&kctx->mmu.mmu_lock);
 
 	mmu_mode = kctx->kbdev->mmu_mode;
 
@@ -1684,7 +1983,7 @@
 	KBASE_DEBUG_ASSERT(0 != size_left);
 	kaddr = vmalloc_user(size_left);
 
-	mutex_lock(&kctx->mmu_lock);
+	mutex_lock(&kctx->mmu.mmu_lock);
 
 	if (kaddr) {
 		u64 end_marker = 0xFFULL;
@@ -1699,7 +1998,8 @@
 		if (kctx->api_version >= KBASE_API_VERSION(8, 4)) {
 			struct kbase_mmu_setup as_setup;
 
-			kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup);
+			kctx->kbdev->mmu_mode->get_as_setup(&kctx->mmu,
+					&as_setup);
 			config[0] = as_setup.transtab;
 			config[1] = as_setup.memattr;
 			config[2] = as_setup.transcfg;
@@ -1710,7 +2010,7 @@
 		}
 
 		dump_size = kbasep_mmu_dump_level(kctx,
-				kctx->pgd,
+				kctx->mmu.pgd,
 				MIDGARD_MMU_TOPLEVEL,
 				&mmu_dump_buffer,
 				&size_left);
@@ -1732,12 +2032,12 @@
 		memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
 	}
 
-	mutex_unlock(&kctx->mmu_lock);
+	mutex_unlock(&kctx->mmu.mmu_lock);
 	return kaddr;
 
 fail_free:
 	vfree(kaddr);
-	mutex_unlock(&kctx->mmu_lock);
+	mutex_unlock(&kctx->mmu.mmu_lock);
 	return NULL;
 }
 KBASE_EXPORT_TEST_API(kbase_mmu_dump);
@@ -1767,11 +2067,10 @@
 		return;
 	}
 
-	if (unlikely(faulting_as->protected_mode))
-	{
+	if (unlikely(faulting_as->protected_mode)) {
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 				"Permission failure");
-		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
 				KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
 		kbasep_js_runpool_release_ctx(kbdev, kctx);
 		atomic_dec(&kbdev->faults_pending);
@@ -1805,9 +2104,9 @@
 		mutex_unlock(&kbdev->mmu_hw_mutex);
 		/* AS transaction end */
 
-		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
 					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
-		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as,
 					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
 
 		kbase_pm_context_idle(kbdev);
@@ -2102,9 +2401,9 @@
 	mutex_unlock(&kbdev->mmu_hw_mutex);
 	/* AS transaction end */
 	/* Clear down the fault */
-	kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+	kbase_mmu_hw_clear_fault(kbdev, as,
 			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
-	kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+	kbase_mmu_hw_enable_fault(kbdev, as,
 			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
 
 #if KBASE_GPU_RESET_EN
@@ -2117,7 +2416,6 @@
 {
 	struct kbase_as *as;
 	struct kbase_device *kbdev;
-	struct kbase_context *kctx;
 	unsigned long flags;
 
 	KBASE_DEBUG_ASSERT(work);
@@ -2133,12 +2431,11 @@
 	 * the AS will not be released as before the atom is released this workqueue
 	 * is flushed (in kbase_as_poking_timer_release_atom)
 	 */
-	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number);
 
 	/* AS transaction begin */
 	mutex_lock(&kbdev->mmu_hw_mutex);
 	/* Force a uTLB invalidate */
-	kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0,
+	kbase_mmu_hw_do_operation(kbdev, as, 0, 0,
 				  AS_COMMAND_UNLOCK, 0);
 	mutex_unlock(&kbdev->mmu_hw_mutex);
 	/* AS transaction end */
@@ -2286,14 +2583,14 @@
 		WARN_ON(as->current_setup.transtab);
 
 		if (kbase_as_has_bus_fault(as)) {
-			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+			kbase_mmu_hw_clear_fault(kbdev, as,
 					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
-			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+			kbase_mmu_hw_enable_fault(kbdev, as,
 					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
 		} else if (kbase_as_has_page_fault(as)) {
-			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+			kbase_mmu_hw_clear_fault(kbdev, as,
 					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
-			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+			kbase_mmu_hw_enable_fault(kbdev, as,
 					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
 		}
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
index 92aa55d..70d5f2b 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -70,10 +70,9 @@
  *
  * @param[in]  kbdev          kbase device to configure.
  * @param[in]  as             address space to configure.
- * @param[in]  kctx           kbase context to configure.
  */
 void kbase_mmu_hw_configure(struct kbase_device *kbdev,
-		struct kbase_as *as, struct kbase_context *kctx);
+		struct kbase_as *as);
 
 /** @brief Issue an operation to the MMU.
  *
@@ -82,7 +81,6 @@
  *
  * @param[in]  kbdev         kbase device to issue the MMU operation on.
  * @param[in]  as            address space to issue the MMU operation on.
- * @param[in]  kctx          kbase context to issue the MMU operation on.
  * @param[in]  vpfn          MMU Virtual Page Frame Number to start the
  *                           operation on.
  * @param[in]  nr            Number of pages to work on.
@@ -93,7 +91,7 @@
  * @return Zero if the operation was successful, non-zero otherwise.
  */
 int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
-		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type,
+		u64 vpfn, u32 nr, u32 type,
 		unsigned int handling_irq);
 
 /** @brief Clear a fault that has been previously reported by the MMU.
@@ -102,11 +100,10 @@
  *
  * @param[in]  kbdev         kbase device to  clear the fault from.
  * @param[in]  as            address space to  clear the fault from.
- * @param[in]  kctx          kbase context to clear the fault from or NULL.
  * @param[in]  type          The type of fault that needs to be cleared.
  */
 void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
-		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+		enum kbase_mmu_fault_type type);
 
 /** @brief Enable fault that has been previously reported by the MMU.
  *
@@ -116,11 +113,10 @@
  *
  * @param[in]  kbdev         kbase device to again enable the fault from.
  * @param[in]  as            address space to again enable the fault from.
- * @param[in]  kctx          kbase context to again enable the fault from.
  * @param[in]  type          The type of fault that needs to be enabled again.
  */
 void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
-		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+		enum kbase_mmu_fault_type type);
 
 /** @} *//* end group mali_kbase_mmu_hw */
 /** @} *//* end group base_kbase_api */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
index 4bb2628..38ca456 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2014, 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2014, 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -21,7 +21,6 @@
  */
 
 
-
 #include "mali_kbase.h"
 #include "mali_midg_regmap.h"
 #include "mali_kbase_defs.h"
@@ -48,32 +47,28 @@
  */
 static inline void page_table_entry_set(u64 *pte, u64 phy)
 {
+#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE
+	WRITE_ONCE(*pte, phy);
+#else
 #ifdef CONFIG_64BIT
+	barrier();
 	*pte = phy;
+	barrier();
 #elif defined(CONFIG_ARM)
-	/*
-	 * In order to prevent the compiler keeping cached copies of
-	 * memory, we have to explicitly say that we have updated memory.
-	 *
-	 * Note: We could manually move the data ourselves into R0 and
-	 * R1 by specifying register variables that are explicitly
-	 * given registers assignments, the down side of this is that
-	 * we have to assume cpu endianness.  To avoid this we can use
-	 * the ldrd to read the data from memory into R0 and R1 which
-	 * will respect the cpu endianness, we then use strd to make
-	 * the 64 bit assignment to the page table entry.
-	 */
-	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
-			"strd r0, r1, [%[pte]]\n\t"
-			: "=m" (*pte)
-			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
-			: "r0", "r1");
+	barrier();
+	asm volatile("ldrd r0, [%1]\n\t"
+		     "strd r0, %0\n\t"
+		     : "=m" (*pte)
+		     : "r" (&phy)
+		     : "r0", "r1");
+	barrier();
 #else
 #error "64-bit atomic write must be implemented for your architecture"
 #endif
+#endif
 }
 
-static void mmu_get_as_setup(struct kbase_context *kctx,
+static void mmu_get_as_setup(struct kbase_mmu_table *mmut,
 		struct kbase_mmu_setup * const setup)
 {
 	/* Set up the required caching policies at the correct indices
@@ -89,22 +84,30 @@
 		(AS_MEMATTR_AARCH64_OUTER_IMPL_DEF   <<
 			(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) |
 		(AS_MEMATTR_AARCH64_OUTER_WA         <<
-			(AS_MEMATTR_INDEX_OUTER_WA * 8));
+			(AS_MEMATTR_INDEX_OUTER_WA * 8)) |
+		(AS_MEMATTR_AARCH64_NON_CACHEABLE    <<
+			(AS_MEMATTR_INDEX_NON_CACHEABLE * 8));
 
-	setup->transtab = (u64)kctx->pgd & AS_TRANSTAB_BASE_MASK;
+	setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK;
 	setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K;
 }
 
-static void mmu_update(struct kbase_context *kctx)
+static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+		int as_nr)
 {
-	struct kbase_device * const kbdev = kctx->kbdev;
-	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
-	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+	struct kbase_as *as;
+	struct kbase_mmu_setup *current_setup;
 
-	mmu_get_as_setup(kctx, current_setup);
+	if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID))
+		return;
+
+	as = &kbdev->as[as_nr];
+	current_setup = &as->current_setup;
+
+	mmu_get_as_setup(mmut, current_setup);
 
 	/* Apply the address space setting */
-	kbase_mmu_hw_configure(kbdev, as, kctx);
+	kbase_mmu_hw_configure(kbdev, as);
 }
 
 static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
@@ -116,7 +119,7 @@
 	current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED;
 
 	/* Apply the address space setting */
-	kbase_mmu_hw_configure(kbdev, as, NULL);
+	kbase_mmu_hw_configure(kbdev, as);
 }
 
 static phys_addr_t pte_to_phy_addr(u64 entry)
@@ -210,7 +213,8 @@
 	.pte_is_valid = pte_is_valid,
 	.entry_set_ate = entry_set_ate,
 	.entry_set_pte = entry_set_pte,
-	.entry_invalidate = entry_invalidate
+	.entry_invalidate = entry_invalidate,
+	.flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE
 };
 
 struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void)
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
index bc8da63..f6bdf91d 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -21,7 +21,6 @@
  */
 
 
-
 #include "mali_kbase.h"
 #include "mali_midg_regmap.h"
 #include "mali_kbase_defs.h"
@@ -46,33 +45,28 @@
  */
 static inline void page_table_entry_set(u64 *pte, u64 phy)
 {
+#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE
+	WRITE_ONCE(*pte, phy);
+#else
 #ifdef CONFIG_64BIT
+	barrier();
 	*pte = phy;
+	barrier();
 #elif defined(CONFIG_ARM)
-	/*
-	 * In order to prevent the compiler keeping cached copies of
-	 * memory, we have to explicitly say that we have updated
-	 * memory.
-	 *
-	 * Note: We could manually move the data ourselves into R0 and
-	 * R1 by specifying register variables that are explicitly
-	 * given registers assignments, the down side of this is that
-	 * we have to assume cpu endianness.  To avoid this we can use
-	 * the ldrd to read the data from memory into R0 and R1 which
-	 * will respect the cpu endianness, we then use strd to make
-	 * the 64 bit assignment to the page table entry.
-	 */
-	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
-			"strd r0, r1, [%[pte]]\n\t"
-			: "=m" (*pte)
-			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
-			: "r0", "r1");
+	barrier();
+	asm volatile("ldrd r0, [%1]\n\t"
+		     "strd r0, %0\n\t"
+		     : "=m" (*pte)
+		     : "r" (&phy)
+		     : "r0", "r1");
+	barrier();
 #else
 #error "64-bit atomic write must be implemented for your architecture"
 #endif
+#endif
 }
 
-static void mmu_get_as_setup(struct kbase_context *kctx,
+static void mmu_get_as_setup(struct kbase_mmu_table *mmut,
 		struct kbase_mmu_setup * const setup)
 {
 	/* Set up the required caching policies at the correct indices
@@ -90,7 +84,7 @@
 		(AS_MEMATTR_INDEX_OUTER_WA * 8))              |
 		0; /* The other indices are unused for now */
 
-	setup->transtab = ((u64)kctx->pgd &
+	setup->transtab = ((u64)mmut->pgd &
 		((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) |
 		AS_TRANSTAB_LPAE_ADRMODE_TABLE |
 		AS_TRANSTAB_LPAE_READ_INNER;
@@ -98,16 +92,23 @@
 	setup->transcfg = 0;
 }
 
-static void mmu_update(struct kbase_context *kctx)
+static void mmu_update(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut,
+		int as_nr)
 {
-	struct kbase_device * const kbdev = kctx->kbdev;
-	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
-	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+	struct kbase_as *as;
+	struct kbase_mmu_setup *current_setup;
 
-	mmu_get_as_setup(kctx, current_setup);
+	if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID))
+		return;
+
+	as = &kbdev->as[as_nr];
+	current_setup = &as->current_setup;
+
+	mmu_get_as_setup(mmut, current_setup);
 
 	/* Apply the address space setting */
-	kbase_mmu_hw_configure(kbdev, as, kctx);
+	kbase_mmu_hw_configure(kbdev, as);
 }
 
 static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
@@ -118,7 +119,7 @@
 	current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
 
 	/* Apply the address space setting */
-	kbase_mmu_hw_configure(kbdev, as, NULL);
+	kbase_mmu_hw_configure(kbdev, as);
 }
 
 static phys_addr_t pte_to_phy_addr(u64 entry)
@@ -145,9 +146,17 @@
 static u64 get_mmu_flags(unsigned long flags)
 {
 	u64 mmu_flags;
+	unsigned long memattr_idx;
 
-	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
-	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+	memattr_idx = KBASE_REG_MEMATTR_VALUE(flags);
+	if (WARN(memattr_idx == AS_MEMATTR_INDEX_NON_CACHEABLE,
+			"Legacy Mode MMU cannot honor GPU non-cachable memory, will use default instead\n"))
+		memattr_idx = AS_MEMATTR_INDEX_DEFAULT;
+	/* store mem_attr index as 4:2, noting that:
+	 * - macro called above ensures 3 bits already
+	 * - all AS_MEMATTR_INDEX_<...> macros only use 3 bits
+	 */
+	mmu_flags = memattr_idx << 2;
 
 	/* write perm if requested */
 	mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
@@ -195,7 +204,8 @@
 	.pte_is_valid = pte_is_valid,
 	.entry_set_ate = entry_set_ate,
 	.entry_set_pte = entry_set_pte,
-	.entry_invalidate = entry_invalidate
+	.entry_invalidate = entry_invalidate,
+	.flags = 0
 };
 
 struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void)
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.c b/drivers/gpu/arm/midgard/mali_kbase_pm.c
index da56f0a..d5b8c77 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_pm.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -52,18 +52,9 @@
 {
 	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
 	int c;
-	int old_count;
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 
-	/* Trace timeline information about how long it took to handle the decision
-	 * to powerup. Sometimes the event might be missed due to reading the count
-	 * outside of mutex, but this is necessary to get the trace timing
-	 * correct. */
-	old_count = kbdev->pm.active_count;
-	if (old_count == 0)
-		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
-
 	mutex_lock(&js_devdata->runpool_mutex);
 	mutex_lock(&kbdev->pm.lock);
 	if (kbase_pm_is_suspending(kbdev)) {
@@ -75,8 +66,6 @@
 		case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
 			mutex_unlock(&kbdev->pm.lock);
 			mutex_unlock(&js_devdata->runpool_mutex);
-			if (old_count == 0)
-				kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
 			return 1;
 
 		case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
@@ -87,17 +76,17 @@
 		}
 	}
 	c = ++kbdev->pm.active_count;
-	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
 	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c);
 
-	/* Trace the event being handled */
-	if (old_count == 0)
-		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
-
-	if (c == 1)
+	if (c == 1) {
 		/* First context active: Power on the GPU and any cores requested by
 		 * the policy */
 		kbase_hwaccess_pm_gpu_active(kbdev);
+	}
+#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_DEVFREQ)
+	if (kbdev->ipa.gpu_active_callback)
+		kbdev->ipa.gpu_active_callback(kbdev->ipa.model_data);
+#endif
 
 	mutex_unlock(&kbdev->pm.lock);
 	mutex_unlock(&js_devdata->runpool_mutex);
@@ -111,31 +100,18 @@
 {
 	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
 	int c;
-	int old_count;
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 
-	/* Trace timeline information about how long it took to handle the decision
-	 * to powerdown. Sometimes the event might be missed due to reading the
-	 * count outside of mutex, but this is necessary to get the trace timing
-	 * correct. */
-	old_count = kbdev->pm.active_count;
-	if (old_count == 0)
-		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
 
 	mutex_lock(&js_devdata->runpool_mutex);
 	mutex_lock(&kbdev->pm.lock);
 
 	c = --kbdev->pm.active_count;
-	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
 	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c);
 
 	KBASE_DEBUG_ASSERT(c >= 0);
 
-	/* Trace the event being handled */
-	if (old_count == 0)
-		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
-
 	if (c == 0) {
 		/* Last context has gone idle */
 		kbase_hwaccess_pm_gpu_idle(kbdev);
@@ -146,6 +122,21 @@
 		wake_up(&kbdev->pm.zero_active_count_wait);
 	}
 
+#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_DEVFREQ)
+	/* IPA may be using vinstr, in which case there may be one PM reference
+	 * still held when all other contexts have left the GPU. Inform IPA that
+	 * the GPU is now idle so that vinstr can drop it's reference.
+	 *
+	 * If the GPU was only briefly active then it might have gone idle
+	 * before vinstr has taken a PM reference, meaning that active_count is
+	 * zero. We still need to inform IPA in this case, so that vinstr can
+	 * drop the PM reference and avoid keeping the GPU powered
+	 * unnecessarily.
+	 */
+	if (c <= 1 && kbdev->ipa.gpu_idle_callback)
+		kbdev->ipa.gpu_idle_callback(kbdev->ipa.model_data);
+#endif
+
 	mutex_unlock(&kbdev->pm.lock);
 	mutex_unlock(&js_devdata->runpool_mutex);
 }
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.h b/drivers/gpu/arm/midgard/mali_kbase_pm.h
index 8de17e1..59a0314 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_pm.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2015,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -136,6 +136,10 @@
  */
 void kbase_pm_context_idle(struct kbase_device *kbdev);
 
+/* NOTE: kbase_pm_is_active() is in mali_kbase.h, because it is an inline
+ * function
+ */
+
 /**
  * Suspend the GPU and prevent any further register accesses to it from Kernel
  * threads.
diff --git a/drivers/gpu/arm/midgard/mali_kbase_replay.c b/drivers/gpu/arm/midgard/mali_kbase_replay.c
index 3d93922..92101fec 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_replay.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_replay.c
@@ -664,8 +664,8 @@
 	atom->prio = prio;
 	atom->atom_number = atom_nr;
 
-	base_jd_atom_dep_set(&atom->pre_dep[0], 0 , BASE_JD_DEP_TYPE_INVALID);
-	base_jd_atom_dep_set(&atom->pre_dep[1], 0 , BASE_JD_DEP_TYPE_INVALID);
+	base_jd_atom_dep_set(&atom->pre_dep[0], 0, BASE_JD_DEP_TYPE_INVALID);
+	base_jd_atom_dep_set(&atom->pre_dep[1], 0, BASE_JD_DEP_TYPE_INVALID);
 
 	atom->udata.blob[0] = 0;
 	atom->udata.blob[1] = 0;
@@ -713,7 +713,8 @@
 	kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio);
 	kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio);
 
-	base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr , BASE_JD_DEP_TYPE_DATA);
+	base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr,
+			     BASE_JD_DEP_TYPE_DATA);
 
 	return 0;
 }
diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
index 7cce3f8..b774c3b 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -375,12 +375,12 @@
 }
 #endif /* CONFIG_MALI_FENCE_DEBUG */
 
-void kbasep_soft_job_timeout_worker(unsigned long data)
+void kbasep_soft_job_timeout_worker(struct timer_list *timer)
 {
-	struct kbase_context *kctx = (struct kbase_context *)data;
+	struct kbase_context *kctx = container_of(timer, struct kbase_context,
+			soft_job_timeout);
 	u32 timeout_ms = (u32)atomic_read(
 			&kctx->kbdev->js_data.soft_job_timeout_ms);
-	struct timer_list *timer = &kctx->soft_job_timeout;
 	ktime_t cur_time = ktime_get();
 	bool restarting = false;
 	unsigned long lflags;
@@ -495,17 +495,6 @@
 		kbase_js_sched_all(katom->kctx->kbdev);
 }
 
-struct kbase_debug_copy_buffer {
-	size_t size;
-	struct page **pages;
-	int nr_pages;
-	size_t offset;
-	struct kbase_mem_phy_alloc *gpu_alloc;
-
-	struct page **extres_pages;
-	int nr_extres_pages;
-};
-
 static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer)
 {
 	struct page **pages = buffer->extres_pages;
@@ -691,13 +680,6 @@
 			ret = 0;
 			break;
 		}
-		case KBASE_MEM_TYPE_IMPORTED_UMP:
-		{
-			dev_warn(katom->kctx->kbdev->dev,
-					"UMP is not supported for debug_copy jobs\n");
-			ret = -EINVAL;
-			goto out_unlock;
-		}
 		default:
 			/* Nothing to be done. */
 			break;
@@ -720,7 +702,7 @@
 	return ret;
 }
 
-static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
+void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
 		void *extres_page, struct page **pages, unsigned int nr_pages,
 		unsigned int *target_page_nr, size_t offset, size_t *to_copy)
 {
@@ -762,7 +744,7 @@
 	kunmap(pages[*target_page_nr]);
 }
 
-static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+int kbase_mem_copy_from_extres(struct kbase_context *kctx,
 		struct kbase_debug_copy_buffer *buf_data)
 {
 	unsigned int i;
@@ -771,9 +753,11 @@
 	u64 offset = buf_data->offset;
 	size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE;
 	size_t to_copy = min(extres_size, buf_data->size);
-	size_t dma_to_copy;
 	struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc;
 	int ret = 0;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	size_t dma_to_copy;
+#endif
 
 	KBASE_DEBUG_ASSERT(pages != NULL);
 
@@ -872,50 +856,90 @@
 	return 0;
 }
 
+#define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7)
+
+int kbasep_jit_alloc_validate(struct kbase_context *kctx,
+					struct base_jit_alloc_info *info)
+{
+	/* If the ID is zero, then fail the job */
+	if (info->id == 0)
+		return -EINVAL;
+
+	/* Sanity check that the PA fits within the VA */
+	if (info->va_pages < info->commit_pages)
+		return -EINVAL;
+
+	/* Ensure the GPU address is correctly aligned */
+	if ((info->gpu_alloc_addr & KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT) != 0)
+		return -EINVAL;
+
+	if (kctx->jit_version == 1) {
+		/* Old JIT didn't have usage_id, max_allocations, bin_id
+		 * or padding, so force them to zero
+		 */
+		info->usage_id = 0;
+		info->max_allocations = 0;
+		info->bin_id = 0;
+		info->flags = 0;
+		memset(info->padding, 0, sizeof(info->padding));
+	} else {
+		int j;
+
+		/* Check padding is all zeroed */
+		for (j = 0; j < sizeof(info->padding); j++) {
+			if (info->padding[j] != 0) {
+				return -EINVAL;
+			}
+		}
+
+		/* No bit other than TILER_ALIGN_TOP shall be set */
+		if (info->flags & ~BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) {
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
 {
 	__user void *data = (__user void *)(uintptr_t) katom->jc;
 	struct base_jit_alloc_info *info;
 	struct kbase_context *kctx = katom->kctx;
+	u32 count;
 	int ret;
+	u32 i;
 
-	/* Fail the job if there is no info structure */
-	if (!data) {
+	/* For backwards compatibility */
+	if (katom->nr_extres == 0)
+		katom->nr_extres = 1;
+	count = katom->nr_extres;
+
+	/* Sanity checks */
+	if (!data || count > kctx->jit_max_allocations ||
+			count > ARRAY_SIZE(kctx->jit_alloc)) {
 		ret = -EINVAL;
 		goto fail;
 	}
 
 	/* Copy the information for safe access and future storage */
-	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	info = kmalloc_array(count, sizeof(*info), GFP_KERNEL);
 	if (!info) {
 		ret = -ENOMEM;
 		goto fail;
 	}
-
-	if (copy_from_user(info, data, sizeof(*info)) != 0) {
+	if (copy_from_user(info, data, sizeof(*info)*count) != 0) {
 		ret = -EINVAL;
 		goto free_info;
 	}
-
-	/* If the ID is zero then fail the job */
-	if (info->id == 0) {
-		ret = -EINVAL;
-		goto free_info;
-	}
-
-	/* Sanity check that the PA fits within the VA */
-	if (info->va_pages < info->commit_pages) {
-		ret = -EINVAL;
-		goto free_info;
-	}
-
-	/* Ensure the GPU address is correctly aligned */
-	if ((info->gpu_alloc_addr & 0x7) != 0) {
-		ret = -EINVAL;
-		goto free_info;
-	}
-
 	katom->softjob_data = info;
+
+	for (i = 0; i < count; i++, info++) {
+		ret = kbasep_jit_alloc_validate(kctx, info);
+		if (ret)
+			goto free_info;
+	}
+
 	katom->jit_blocked = false;
 
 	lockdep_assert_held(&kctx->jctx.lock);
@@ -935,17 +959,38 @@
 	return 0;
 
 free_info:
-	kfree(info);
+	kfree(katom->softjob_data);
+	katom->softjob_data = NULL;
 fail:
 	return ret;
 }
 
-static u8 kbase_jit_free_get_id(struct kbase_jd_atom *katom)
+static u8 *kbase_jit_free_get_ids(struct kbase_jd_atom *katom)
 {
-	if (WARN_ON(katom->core_req != BASE_JD_REQ_SOFT_JIT_FREE))
-		return 0;
+	if (WARN_ON((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) !=
+				BASE_JD_REQ_SOFT_JIT_FREE))
+		return NULL;
 
-	return (u8) katom->jc;
+	return (u8 *) katom->softjob_data;
+}
+
+static void kbase_jit_add_to_pending_alloc_list(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct list_head *target_list_head = NULL;
+	struct kbase_jd_atom *entry;
+
+	list_for_each_entry(entry, &kctx->jit_pending_alloc, queue) {
+		if (katom->age < entry->age) {
+			target_list_head = &entry->queue;
+			break;
+		}
+	}
+
+	if (target_list_head == NULL)
+		target_list_head = &kctx->jit_pending_alloc;
+
+	list_add_tail(&katom->queue, target_list_head);
 }
 
 static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
@@ -955,6 +1000,8 @@
 	struct kbase_va_region *reg;
 	struct kbase_vmap_struct mapping;
 	u64 *ptr, new_addr;
+	u32 count = katom->nr_extres;
+	u32 i;
 
 	if (katom->jit_blocked) {
 		list_del(&katom->queue);
@@ -962,96 +1009,130 @@
 	}
 
 	info = katom->softjob_data;
-
 	if (WARN_ON(!info)) {
 		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
 		return 0;
 	}
 
-	/* The JIT ID is still in use so fail the allocation */
-	if (kctx->jit_alloc[info->id]) {
-		katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
-		return 0;
+	for (i = 0; i < count; i++, info++) {
+		/* The JIT ID is still in use so fail the allocation */
+		if (kctx->jit_alloc[info->id]) {
+			katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+			return 0;
+		}
 	}
 
-	/* Create a JIT allocation */
-	reg = kbase_jit_allocate(kctx, info);
-	if (!reg) {
-		struct kbase_jd_atom *jit_atom;
-		bool can_block = false;
-
-		lockdep_assert_held(&kctx->jctx.lock);
-
-		jit_atom = list_first_entry(&kctx->jit_atoms_head,
-				struct kbase_jd_atom, jit_node);
-
-		list_for_each_entry(jit_atom, &kctx->jit_atoms_head, jit_node) {
-			if (jit_atom == katom)
-				break;
-			if (jit_atom->core_req == BASE_JD_REQ_SOFT_JIT_FREE) {
-				u8 free_id = kbase_jit_free_get_id(jit_atom);
-
-				if (free_id && kctx->jit_alloc[free_id]) {
-					/* A JIT free which is active and
-					 * submitted before this atom
-					 */
-					can_block = true;
-					break;
-				}
-			}
-		}
-
-		if (!can_block) {
-			/* Mark the allocation so we know it's in use even if
-			 * the allocation itself fails.
+	for (i = 0, info = katom->softjob_data; i < count; i++, info++) {
+		if (kctx->jit_alloc[info->id]) {
+			/* The JIT ID is duplicated in this atom. Roll back
+			 * previous allocations and fail.
 			 */
-			kctx->jit_alloc[info->id] =
-				(struct kbase_va_region *) -1;
+			u32 j;
+
+			info = katom->softjob_data;
+			for (j = 0; j < i; j++, info++) {
+				kbase_jit_free(kctx, kctx->jit_alloc[info->id]);
+				kctx->jit_alloc[info->id] =
+						(struct kbase_va_region *) -1;
+			}
 
 			katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
 			return 0;
 		}
 
-		/* There are pending frees for an active allocation
-		 * so we should wait to see whether they free the memory.
-		 * Add to the beginning of the list to ensure that the atom is
-		 * processed only once in kbase_jit_free_finish
-		 */
-		list_add(&katom->queue, &kctx->jit_pending_alloc);
-		katom->jit_blocked = true;
+		/* Create a JIT allocation */
+		reg = kbase_jit_allocate(kctx, info);
+		if (!reg) {
+			struct kbase_jd_atom *jit_atom;
+			bool can_block = false;
 
-		return 1;
+			lockdep_assert_held(&kctx->jctx.lock);
+
+			jit_atom = list_first_entry(&kctx->jit_atoms_head,
+					struct kbase_jd_atom, jit_node);
+
+			list_for_each_entry(jit_atom, &kctx->jit_atoms_head, jit_node) {
+				if (jit_atom == katom)
+					break;
+
+				if ((jit_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ==
+						BASE_JD_REQ_SOFT_JIT_FREE) {
+					u8 *free_ids = kbase_jit_free_get_ids(jit_atom);
+
+					if (free_ids && *free_ids &&
+						kctx->jit_alloc[*free_ids]) {
+						/* A JIT free which is active and
+						 * submitted before this atom
+						 */
+						can_block = true;
+						break;
+					}
+				}
+			}
+
+			if (!can_block) {
+				/* Mark the failed allocation as well as the
+				 * other un-attempted allocations in the set,
+				 * so we know they are in use even if the
+				 * allocation itself failed.
+				 */
+				for (; i < count; i++, info++) {
+					kctx->jit_alloc[info->id] =
+						(struct kbase_va_region *) -1;
+				}
+
+				katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+				return 0;
+			}
+
+			/* There are pending frees for an active allocation
+			 * so we should wait to see whether they free the
+			 * memory. Add to the list of atoms for which JIT
+			 * allocation is pending.
+			 */
+			kbase_jit_add_to_pending_alloc_list(katom);
+			katom->jit_blocked = true;
+
+			/* Rollback, the whole set will be re-attempted */
+			while (i-- > 0) {
+				info--;
+				kbase_jit_free(kctx, kctx->jit_alloc[info->id]);
+				kctx->jit_alloc[info->id] = NULL;
+			}
+
+			return 1;
+		}
+
+		/* Bind it to the user provided ID. */
+		kctx->jit_alloc[info->id] = reg;
 	}
 
-	/*
-	 * Write the address of the JIT allocation to the user provided
-	 * GPU allocation.
-	 */
-	ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
-			&mapping);
-	if (!ptr) {
+	for (i = 0, info = katom->softjob_data; i < count; i++, info++) {
 		/*
-		 * Leave the allocation "live" as the JIT free jit will be
-		 * submitted anyway.
+		 * Write the address of the JIT allocation to the user provided
+		 * GPU allocation.
 		 */
-		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
-		return 0;
-	}
+		ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+				&mapping);
+		if (!ptr) {
+			/*
+			 * Leave the allocations "live" as the JIT free atom
+			 * will be submitted anyway.
+			 */
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			return 0;
+		}
 
-	new_addr = reg->start_pfn << PAGE_SHIFT;
-	*ptr = new_addr;
-	KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(
-			katom, info->gpu_alloc_addr, new_addr);
-	kbase_vunmap(kctx, &mapping);
+		reg = kctx->jit_alloc[info->id];
+		new_addr = reg->start_pfn << PAGE_SHIFT;
+		*ptr = new_addr;
+		KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(
+				katom, info->gpu_alloc_addr, new_addr);
+		kbase_vunmap(kctx, &mapping);
+	}
 
 	katom->event_code = BASE_JD_EVENT_DONE;
 
-	/*
-	 * Bind it to the user provided ID. Do this last so we can check for
-	 * the JIT free racing this JIT alloc job.
-	 */
-	kctx->jit_alloc[info->id] = reg;
-
 	return 0;
 }
 
@@ -1061,6 +1142,9 @@
 
 	lockdep_assert_held(&katom->kctx->jctx.lock);
 
+	if (WARN_ON(!katom->softjob_data))
+		return;
+
 	/* Remove atom from jit_atoms_head list */
 	list_del(&katom->jit_node);
 
@@ -1077,34 +1161,76 @@
 static int kbase_jit_free_prepare(struct kbase_jd_atom *katom)
 {
 	struct kbase_context *kctx = katom->kctx;
+	__user void *data = (__user void *)(uintptr_t) katom->jc;
+	u8 *ids;
+	u32 count = MAX(katom->nr_extres, 1);
+	int ret;
+
+	/* Sanity checks */
+	if (count > ARRAY_SIZE(kctx->jit_alloc)) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	ids = kmalloc_array(count, sizeof(*ids), GFP_KERNEL);
+	if (!ids) {
+		ret = -ENOMEM;
+		goto fail;
+	}
 
 	lockdep_assert_held(&kctx->jctx.lock);
+	katom->softjob_data = ids;
+
+	/* For backwards compatibility */
+	if (katom->nr_extres) {
+		/* Fail the job if there is no list of ids */
+		if (!data) {
+			ret = -EINVAL;
+			goto free_info;
+		}
+
+		if (copy_from_user(ids, data, sizeof(*ids)*count) != 0) {
+			ret = -EINVAL;
+			goto free_info;
+		}
+	} else {
+		katom->nr_extres = 1;
+		*ids = (u8)katom->jc;
+	}
+
 	list_add_tail(&katom->jit_node, &kctx->jit_atoms_head);
 
 	return 0;
+
+free_info:
+	kfree(katom->softjob_data);
+	katom->softjob_data = NULL;
+fail:
+	return ret;
 }
 
 static void kbase_jit_free_process(struct kbase_jd_atom *katom)
 {
 	struct kbase_context *kctx = katom->kctx;
-	u8 id = kbase_jit_free_get_id(katom);
+	u8 *ids = kbase_jit_free_get_ids(katom);
+	u32 count = katom->nr_extres;
+	u32 i;
 
-	/*
-	 * If the ID is zero or it is not in use yet then fail the job.
-	 */
-	if ((id == 0) || (kctx->jit_alloc[id] == NULL)) {
+	if (ids == NULL) {
 		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
 		return;
 	}
 
-	/*
-	 * If the ID is valid but the allocation request failed still succeed
-	 * this soft job but don't try and free the allocation.
-	 */
-	if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1)
-		kbase_jit_free(kctx, kctx->jit_alloc[id]);
-
-	kctx->jit_alloc[id] = NULL;
+	for (i = 0; i < count; i++, ids++) {
+		/*
+		 * If the ID is zero or it is not in use yet then fail the job.
+		 */
+		if ((*ids == 0) || (kctx->jit_alloc[*ids] == NULL)) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			return;
+		}
+	}
 }
 
 static void kbasep_jit_free_finish_worker(struct work_struct *work)
@@ -1127,12 +1253,39 @@
 {
 	struct list_head *i, *tmp;
 	struct kbase_context *kctx = katom->kctx;
+	LIST_HEAD(jit_pending_alloc_list);
+	u8 *ids;
+	size_t j;
 
 	lockdep_assert_held(&kctx->jctx.lock);
+
+	ids = kbase_jit_free_get_ids(katom);
+	if (WARN_ON(ids == NULL)) {
+		return;
+	}
+
 	/* Remove this atom from the kctx->jit_atoms_head list */
 	list_del(&katom->jit_node);
 
-	list_for_each_safe(i, tmp, &kctx->jit_pending_alloc) {
+	for (j = 0; j != katom->nr_extres; ++j) {
+		if ((ids[j] != 0) && (kctx->jit_alloc[ids[j]] != NULL)) {
+			/*
+			 * If the ID is valid but the allocation request failed
+			 * still succeed this soft job but don't try and free
+			 * the allocation.
+			 */
+			if (kctx->jit_alloc[ids[j]] != (struct kbase_va_region *) -1)
+				kbase_jit_free(kctx, kctx->jit_alloc[ids[j]]);
+
+			kctx->jit_alloc[ids[j]] = NULL;
+		}
+	}
+	/* Free the list of ids */
+	kfree(ids);
+
+	list_splice_tail_init(&kctx->jit_pending_alloc, &jit_pending_alloc_list);
+
+	list_for_each_safe(i, tmp, &jit_pending_alloc_list) {
 		struct kbase_jd_atom *pending_atom = list_entry(i,
 				struct kbase_jd_atom, queue);
 		if (kbase_jit_allocate_process(pending_atom) == 0) {
@@ -1271,9 +1424,14 @@
 
 int kbase_process_soft_job(struct kbase_jd_atom *katom)
 {
+	int ret = 0;
+
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(katom);
+
 	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
 	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
-		return kbase_dump_cpu_gpu_time(katom);
+		ret = kbase_dump_cpu_gpu_time(katom);
+		break;
 
 #if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
 	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
@@ -1283,7 +1441,7 @@
 		break;
 	case BASE_JD_REQ_SOFT_FENCE_WAIT:
 	{
-		int ret = kbase_sync_fence_in_wait(katom);
+		ret = kbase_sync_fence_in_wait(katom);
 
 		if (ret == 1) {
 #ifdef CONFIG_MALI_FENCE_DEBUG
@@ -1292,14 +1450,16 @@
 			kbasep_add_waiting_soft_job(katom);
 #endif
 		}
-		return ret;
+		break;
 	}
 #endif
 
 	case BASE_JD_REQ_SOFT_REPLAY:
-		return kbase_replay_process(katom);
+		ret = kbase_replay_process(katom);
+		break;
 	case BASE_JD_REQ_SOFT_EVENT_WAIT:
-		return kbasep_soft_event_wait(katom);
+		ret = kbasep_soft_event_wait(katom);
+		break;
 	case BASE_JD_REQ_SOFT_EVENT_SET:
 		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET);
 		break;
@@ -1315,7 +1475,8 @@
 		break;
 	}
 	case BASE_JD_REQ_SOFT_JIT_ALLOC:
-		return kbase_jit_allocate_process(katom);
+		ret = kbase_jit_allocate_process(katom);
+		break;
 	case BASE_JD_REQ_SOFT_JIT_FREE:
 		kbase_jit_free_process(katom);
 		break;
@@ -1328,7 +1489,8 @@
 	}
 
 	/* Atom is complete */
-	return 0;
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(katom);
+	return ret;
 }
 
 void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.h b/drivers/gpu/arm/midgard/mali_kbase_sync.h
index a7690b2..70557dd 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_sync.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -161,7 +161,11 @@
  */
 static inline void kbase_sync_fence_close_fd(int fd)
 {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)
+	ksys_close(fd);
+#else
 	sys_close(fd);
+#endif
 }
 
 /**
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_common.c b/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
index 9520f5ac..5239dae 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,6 +28,7 @@
 
 #include <linux/workqueue.h>
 #include "mali_kbase.h"
+#include "mali_kbase_sync.h"
 
 void kbase_sync_fence_wait_worker(struct work_struct *data)
 {
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_file.c b/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
index 8f8f3c8..bb94aee 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -73,10 +73,14 @@
 	if (!fence)
 		return -ENOMEM;
 
-	/* Take an extra reference to the fence on behalf of the katom.
-	 * This is needed because sync_file_create() will take ownership of
-	 * one of these refs */
+#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE)
+	/* Take an extra reference to the fence on behalf of the sync_file.
+	 * This is only needed on older kernels where sync_file_create()
+	 * does not take its own reference. This was changed in v4.9.68,
+	 * where sync_file_create() now takes its own reference.
+	 */
 	dma_fence_get(fence);
+#endif
 
 	/* create a sync_file fd representing the fence */
 	sync_file = sync_file_create(fence);
@@ -166,7 +170,9 @@
 	struct kbase_context *kctx = katom->kctx;
 
 	/* Cancel atom if fence is erroneous */
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0))
+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \
+	 (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
+	  KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE))
 	if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error)
 #else
 	if (dma_fence_is_signaled(kcb->fence) && kcb->fence->status < 0)
@@ -282,7 +288,9 @@
 	 * 1 : signaled
 	 */
 	if (dma_fence_is_signaled(fence)) {
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0))
+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \
+	 (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
+	  KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE))
 		int status = fence->error;
 #else
 		int status = fence->status;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
index 926d6b6..2ff45f50 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -155,6 +155,8 @@
 	KBASE_TL_EVENT_LPU_SOFTSTOP,
 	KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
 	KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+	KBASE_TL_EVENT_ATOM_SOFTJOB_START,
+	KBASE_TL_EVENT_ATOM_SOFTJOB_END,
 
 	/* Job dump specific events. */
 	KBASE_JD_GPU_SOFT_RESET
@@ -500,6 +502,20 @@
 		"atom"
 	},
 	{
+		KBASE_TL_EVENT_ATOM_SOFTJOB_START,
+		__stringify(KBASE_TL_EVENT_ATOM_SOFTJOB_START),
+		"atom soft job has started",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTJOB_END,
+		__stringify(KBASE_TL_EVENT_ATOM_SOFTJOB_END),
+		"atom soft job has completed",
+		"@p",
+		"atom"
+	},
+	{
 		KBASE_JD_GPU_SOFT_RESET,
 		__stringify(KBASE_JD_GPU_SOFT_RESET),
 		"gpu soft reset",
@@ -1042,17 +1058,17 @@
 
 /**
  * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback
- * @data:  unused
+ * @timer: unused
  *
  * Timer is executed periodically to check if any of the stream contains
  * buffer ready to be submitted to user space.
  */
-static void kbasep_tlstream_autoflush_timer_callback(unsigned long data)
+static void kbasep_tlstream_autoflush_timer_callback(struct timer_list *timer)
 {
 	enum tl_stream_type stype;
 	int                 rcode;
 
-	CSTD_UNUSED(data);
+	CSTD_UNUSED(timer);
 
 	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) {
 		struct tl_stream *stream = tl_stream[stype];
@@ -1376,9 +1392,8 @@
 
 	/* Initialize autoflush timer. */
 	atomic_set(&autoflush_timer_active, 0);
-	setup_timer(&autoflush_timer,
-			kbasep_tlstream_autoflush_timer_callback,
-			0);
+	kbase_timer_setup(&autoflush_timer,
+			  kbasep_tlstream_autoflush_timer_callback);
 
 	return 0;
 }
@@ -2365,6 +2380,52 @@
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
+void __kbase_tlstream_tl_event_atom_softjob_start(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_START;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softjob_end(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_END;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
 void __kbase_tlstream_jd_gpu_soft_reset(void *gpu)
 {
 	const u32     msg_id = KBASE_JD_GPU_SOFT_RESET;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
index f436901..bfa25d9 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
@@ -147,6 +147,8 @@
 void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom);
 void __kbase_tlstream_tl_event_lpu_softstop(void *lpu);
 void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom);
+void __kbase_tlstream_tl_event_atom_softjob_start(void *atom);
+void __kbase_tlstream_tl_event_atom_softjob_end(void *atom);
 void __kbase_tlstream_jd_gpu_soft_reset(void *gpu);
 void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
 void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
@@ -515,27 +517,41 @@
 	__TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg)
 
 /**
- * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ex
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX
  * @atom:       atom identifier
  */
 #define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(atom) \
 	__TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom)
 
 /**
- * KBASE_TLSTREAM_TL_EVENT_LPU_softstop
+ * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP
  * @lpu:        name of the LPU object
  */
 #define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(lpu) \
 	__TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu)
 
 /**
- * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_issue
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE
  * @atom:       atom identifier
  */
 #define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(atom) \
 	__TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom)
 
 /**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softjob_start, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softjob_end, atom)
+
+/**
  * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - The GPU is being soft reset
  * @gpu:        name of the GPU object
  *
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
index 32fffe0..d7364d5 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -236,14 +236,8 @@
 	/* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */
 	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
 	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER),
-	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE),
-	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE),
-	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED),
-	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED),
-	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE),
-	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE),
-	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED),
-	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_NEEDED),
 	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED),
 	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED),
 	KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS),
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
deleted file mode 100644
index ee6bdf8..0000000
--- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-
-#include <mali_kbase.h>
-#include <mali_kbase_jm.h>
-#include <mali_kbase_hwaccess_jm.h>
-
-#define CREATE_TRACE_POINTS
-
-#ifdef CONFIG_MALI_TRACE_TIMELINE
-#include "mali_timeline.h"
-
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atoms_in_flight);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atom);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_active);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_action);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_power_active);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_l2_power_active);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_event);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_slot_atom);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_checktrans);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_context_active);
-
-struct kbase_trace_timeline_desc {
-	char *enum_str;
-	char *desc;
-	char *format;
-	char *format_desc;
-};
-
-static struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] = {
-	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc }
-	#include "mali_kbase_trace_timeline_defs.h"
-	#undef KBASE_TIMELINE_TRACE_CODE
-};
-
-#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table)
-
-static void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos)
-{
-	if (*pos >= KBASE_NR_TRACE_CODES)
-		return NULL;
-
-	return &kbase_trace_timeline_desc_table[*pos];
-}
-
-static void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data)
-{
-}
-
-static void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos)
-{
-	(*pos)++;
-
-	if (*pos == KBASE_NR_TRACE_CODES)
-		return NULL;
-
-	return &kbase_trace_timeline_desc_table[*pos];
-}
-
-static int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data)
-{
-	struct kbase_trace_timeline_desc *trace_desc = data;
-
-	seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc);
-	return 0;
-}
-
-
-static const struct seq_operations kbasep_trace_timeline_seq_ops = {
-	.start = kbasep_trace_timeline_seq_start,
-	.next = kbasep_trace_timeline_seq_next,
-	.stop = kbasep_trace_timeline_seq_stop,
-	.show = kbasep_trace_timeline_seq_show,
-};
-
-static int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &kbasep_trace_timeline_seq_ops);
-}
-
-static const struct file_operations kbasep_trace_timeline_debugfs_fops = {
-	.open = kbasep_trace_timeline_debugfs_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
-
-#ifdef CONFIG_DEBUG_FS
-
-void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
-{
-	debugfs_create_file("mali_timeline_defs",
-			S_IRUGO, kbdev->mali_debugfs_directory, NULL,
-			&kbasep_trace_timeline_debugfs_fops);
-}
-
-#endif /* CONFIG_DEBUG_FS */
-
-void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
-		struct kbase_jd_atom *katom, int js)
-{
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	if (kbdev->timeline.slot_atoms_submitted[js] > 0) {
-		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1);
-	} else {
-		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
-
-		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1);
-		KBASE_TIMELINE_JOB_START(kctx, js, atom_number);
-	}
-	++kbdev->timeline.slot_atoms_submitted[js];
-
-	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
-}
-
-void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
-		struct kbase_jd_atom *katom, int js,
-		kbasep_js_atom_done_code done_code)
-{
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) {
-		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0);
-	} else {
-		/* Job finished in JS_HEAD */
-		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
-
-		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0);
-		KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number);
-
-		/* see if we need to trace the job in JS_NEXT moving to JS_HEAD */
-		if (kbase_backend_nr_atoms_submitted(kbdev, js)) {
-			struct kbase_jd_atom *next_katom;
-			struct kbase_context *next_kctx;
-
-			/* Peek the next atom - note that the atom in JS_HEAD will already
-			 * have been dequeued */
-			next_katom = kbase_backend_inspect_head(kbdev, js);
-			WARN_ON(!next_katom);
-			next_kctx = next_katom->kctx;
-			KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0);
-			KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1);
-			KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom));
-		}
-	}
-
-	--kbdev->timeline.slot_atoms_submitted[js];
-
-	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
-}
-
-void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
-{
-	int uid = 0;
-	int old_uid;
-
-	/* If a producer already exists for the event, try to use their UID (multiple-producers) */
-	uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]);
-	old_uid = uid;
-
-	/* Get a new non-zero UID if we don't have one yet */
-	while (!uid)
-		uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter);
-
-	/* Try to use this UID */
-	if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid))
-		/* If it changed, raced with another producer: we've lost this UID */
-		uid = 0;
-
-	KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid);
-}
-
-void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
-{
-	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
-
-	if (uid != 0) {
-		if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
-			/* If it changed, raced with another consumer: we've lost this UID */
-			uid = 0;
-
-		KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
-	}
-}
-
-void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
-{
-	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
-
-	if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
-		/* If it changed, raced with another consumer: we've lost this UID */
-		uid = 0;
-
-	KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
-}
-
-void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
-{
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-	/* Simply log the start of the transition */
-	kbdev->timeline.l2_transitioning = true;
-	KBASE_TIMELINE_POWERING_L2(kbdev);
-}
-
-void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
-{
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-	/* Simply log the end of the transition */
-	if (kbdev->timeline.l2_transitioning) {
-		kbdev->timeline.l2_transitioning = false;
-		KBASE_TIMELINE_POWERED_L2(kbdev);
-	}
-}
-
-#endif /* CONFIG_MALI_TRACE_TIMELINE */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
deleted file mode 100644
index c1a3dfc..0000000
--- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
+++ /dev/null
@@ -1,368 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-
-#if !defined(_KBASE_TRACE_TIMELINE_H)
-#define _KBASE_TRACE_TIMELINE_H
-
-#ifdef CONFIG_MALI_TRACE_TIMELINE
-
-enum kbase_trace_timeline_code {
-	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val
-	#include "mali_kbase_trace_timeline_defs.h"
-	#undef KBASE_TIMELINE_TRACE_CODE
-};
-
-#ifdef CONFIG_DEBUG_FS
-
-/** Initialize Timeline DebugFS entries */
-void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev);
-
-#else /* CONFIG_DEBUG_FS */
-
-#define kbasep_trace_timeline_debugfs_init CSTD_NOP
-
-#endif /* CONFIG_DEBUG_FS */
-
-/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE
- * functions.
- * Output is timestamped by either sched_clock() (default), local_clock(), or
- * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */
-#include "mali_timeline.h"
-
-/* Trace number of atoms in flight for kctx (atoms either not completed, or in
-   process of being returned to user */
-#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count)                          \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec,   \
-				(int)kctx->timeline.owner_tgid,              \
-				count);                                      \
-	} while (0)
-
-/* Trace atom_id being Ready to Run */
-#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id)                             \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec,              \
-				CTX_FLOW_ATOM_READY,                         \
-				(int)kctx->timeline.owner_tgid,              \
-				atom_id);                                    \
-	} while (0)
-
-/* Trace number of atoms submitted to job slot js
- *
- * NOTE: This uses a different tracepoint to the head/next/soft-stop actions,
- * so that those actions can be filtered out separately from this
- *
- * This is because this is more useful, as we can use it to calculate general
- * utilization easily and accurately */
-#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count)                      \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec,   \
-				SW_SET_GPU_SLOT_ACTIVE,                      \
-				(int)kctx->timeline.owner_tgid,              \
-				js, count);                                  \
-	} while (0)
-
-
-/* Trace atoms present in JS_NEXT */
-#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count)                       \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
-				SW_SET_GPU_SLOT_NEXT,                        \
-				(int)kctx->timeline.owner_tgid,              \
-				js, count);                                  \
-	} while (0)
-
-/* Trace atoms present in JS_HEAD */
-#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count)                       \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
-				SW_SET_GPU_SLOT_HEAD,                        \
-				(int)kctx->timeline.owner_tgid,              \
-				js, count);                                  \
-	} while (0)
-
-/* Trace that a soft stop/evict from next is being attempted on a slot */
-#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
-				SW_SET_GPU_SLOT_STOPPING,                    \
-				(kctx) ? (int)kctx->timeline.owner_tgid : 0, \
-				js, count);                                  \
-	} while (0)
-
-
-
-/* Trace state of overall GPU power */
-#define KBASE_TIMELINE_GPU_POWER(kbdev, active)                              \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
-				SW_SET_GPU_POWER_ACTIVE, active);            \
-	} while (0)
-
-/* Trace state of tiler power */
-#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap)                            \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
-				SW_SET_GPU_POWER_TILER_ACTIVE,               \
-				hweight64(bitmap));                          \
-	} while (0)
-
-/* Trace number of shaders currently powered */
-#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap)                           \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
-				SW_SET_GPU_POWER_SHADER_ACTIVE,              \
-				hweight64(bitmap));                          \
-	} while (0)
-
-/* Trace state of L2 power */
-#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap)                               \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
-				SW_SET_GPU_POWER_L2_ACTIVE,                  \
-				hweight64(bitmap));                          \
-	} while (0)
-
-/* Trace state of L2 cache*/
-#define KBASE_TIMELINE_POWERING_L2(kbdev)                                    \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
-				SW_FLOW_GPU_POWER_L2_POWERING,               \
-				1);                                          \
-	} while (0)
-
-#define KBASE_TIMELINE_POWERED_L2(kbdev)                                     \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
-				SW_FLOW_GPU_POWER_L2_ACTIVE,                 \
-				1);                                          \
-	} while (0)
-
-/* Trace kbase_pm_send_event message send */
-#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id)         \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
-				SW_FLOW_PM_SEND_EVENT,                       \
-				event_type, pm_event_id);                    \
-	} while (0)
-
-/* Trace kbase_pm_worker message receive */
-#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id)       \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
-				SW_FLOW_PM_HANDLE_EVENT,                     \
-				event_type, pm_event_id);                    \
-	} while (0)
-
-
-/* Trace atom_id starting in JS_HEAD */
-#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number)          \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
-				HW_START_GPU_JOB_CHAIN_SW_APPROX,            \
-				(int)kctx->timeline.owner_tgid,              \
-				js, _consumerof_atom_number);                \
-	} while (0)
-
-/* Trace atom_id stopping on JS_HEAD */
-#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
-				HW_STOP_GPU_JOB_CHAIN_SW_APPROX,             \
-				(int)kctx->timeline.owner_tgid,              \
-				js, _producerof_atom_number_completed);      \
-	} while (0)
-
-/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a
- * certin caller */
-#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code)                      \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec,     \
-				trace_code, 1);                              \
-	} while (0)
-
-/* Trace number of contexts active */
-#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count)                          \
-	do {                                                                 \
-		struct timespec ts;                                          \
-		getrawmonotonic(&ts);                                        \
-		trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec,    \
-				count);                                      \
-	} while (0)
-
-/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */
-
-/**
- * Trace that an atom is starting on a job slot
- *
- * The caller must be holding hwaccess_lock
- */
-void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
-		struct kbase_jd_atom *katom, int js);
-
-/**
- * Trace that an atom has done on a job slot
- *
- * 'Done' in this sense can occur either because:
- * - the atom in JS_HEAD finished
- * - the atom in JS_NEXT was evicted
- *
- * Whether the atom finished or was evicted is passed in @a done_code
- *
- * It is assumed that the atom has already been removed from the submit slot,
- * with either:
- * - kbasep_jm_dequeue_submit_slot()
- * - kbasep_jm_dequeue_tail_submit_slot()
- *
- * The caller must be holding hwaccess_lock
- */
-void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
-		struct kbase_jd_atom *katom, int js,
-		kbasep_js_atom_done_code done_code);
-
-
-/** Trace a pm event starting */
-void kbase_timeline_pm_send_event(struct kbase_device *kbdev,
-		enum kbase_timeline_pm_event event_sent);
-
-/** Trace a pm event finishing */
-void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
-
-/** Check whether a pm event was present, and if so trace finishing it */
-void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
-
-/** Trace L2 power-up start */
-void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev);
-
-/** Trace L2 power-up done */
-void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev);
-
-#else
-
-#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP()
-
-#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP()
-
-#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP()
-
-#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP()
-
-#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP()
-
-#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP()
-
-#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP()
-
-#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP()
-
-#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP()
-
-#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP()
-
-#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP()
-
-#define KBASE_TIMELINE_POWERED_L2(kbdev)  CSTD_NOP()
-
-#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
-
-#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
-
-#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP()
-
-#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP()
-
-#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP()
-
-#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP()
-
-static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
-		struct kbase_jd_atom *katom, int js)
-{
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-}
-
-static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
-		struct kbase_jd_atom *katom, int js,
-		kbasep_js_atom_done_code done_code)
-{
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-}
-
-static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
-{
-}
-
-static inline void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
-{
-}
-
-static inline void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
-{
-}
-
-static inline void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
-{
-}
-
-static inline void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
-{
-}
-#endif				/* CONFIG_MALI_TRACE_TIMELINE */
-
-#endif				/* _KBASE_TRACE_TIMELINE_H */
-
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
deleted file mode 100644
index 114bcac..0000000
--- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-
-/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
- * *****            DO NOT INCLUDE DIRECTLY                  *****
- * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
-
-/*
- * Conventions on Event Names:
- *
- * - The prefix determines something about how the timeline should be
- *   displayed, and is split up into various parts, separated by underscores:
- *  - 'SW' and 'HW' as the first part will be used to determine whether a
- *     timeline is to do with Software or Hardware - effectively, separate
- *     'channels' for Software and Hardware
- *  - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and
- *    signify related pairs of events - these are optional.
- *  - 'FLOW' indicates a generic event, which can use dependencies
- * - This gives events such as:
- *  - 'SW_ENTER_FOO'
- *  - 'SW_LEAVE_FOO'
- *  - 'SW_FLOW_BAR_1'
- *  - 'SW_FLOW_BAR_2'
- *  - 'HW_START_BAZ'
- *  - 'HW_STOP_BAZ'
- * - And an unadorned HW event:
- *  - 'HW_BAZ_FROZBOZ'
- */
-
-/*
- * Conventions on parameter names:
- * - anything with 'instance' in the name will have a separate timeline based
- *   on that instances.
- * - underscored-prefixed parameters will by hidden by default on timelines
- *
- * Hence:
- * - Different job slots have their own 'instance', based on the instance value
- * - Per-context info (e.g. atoms on a context) have their own 'instance'
- *   (i.e. each context should be on a different timeline)
- *
- * Note that globally-shared resources can be tagged with a tgid, but we don't
- * want an instance per context:
- * - There's no point having separate Job Slot timelines for each context, that
- *   would be confusing - there's only really 3 job slots!
- * - There's no point having separate Shader-powered timelines for each
- *   context, that would be confusing - all shader cores (whether it be 4, 8,
- *   etc) are shared in the system.
- */
-
-	/*
-	 * CTX events
-	 */
-	/* Separate timelines for each context 'instance'*/
-	KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT,     "CTX: Atoms in flight",            "%d,%d",    "_instance_tgid,_value_number_of_atoms"),
-	KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY,            "CTX: Atoms Ready to Run",         "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"),
-
-	/*
-	 * SW Events
-	 */
-	/* Separate timelines for each slot 'instance' */
-	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE,         "SW: GPU slot active",             "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"),
-	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT,           "SW: GPU atom in NEXT",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"),
-	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD,           "SW: GPU atom in HEAD",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"),
-	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING,       "SW: Try Soft-Stop on GPU slot",   "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"),
-	/* Shader and overall power is shared - can't have separate instances of
-	 * it, just tagging with the context */
-	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE,        "SW: GPU power active",            "%d,%d",    "_tgid,_value_is_power_active"),
-	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE,  "SW: GPU tiler powered",           "%d,%d",    "_tgid,_value_number_of_tilers"),
-	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered",         "%d,%d",    "_tgid,_value_number_of_shaders"),
-	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE,     "SW: GPU L2 powered",              "%d,%d",    "_tgid,_value_number_of_l2"),
-
-	/* SW Power event messaging. _event_type is one from the kbase_pm_event enum  */
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT,          "SW: PM Send Event",               "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT,        "SW: PM Handle Event",             "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"),
-	/* SW L2 power events */
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING,  "SW: GPU L2 powering",             "%d,%d", "_tgid,_writerof_l2_transitioning"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE,	  "SW: GPU L2 powering done",        "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"),
-
-	KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE,          "SW: Context Active",              "%d,%d",    "_tgid,_value_active"),
-
-	/*
-	 * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock()
-	 */
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END,   "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END,   "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END,   "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"),
-
-	/*
-	 * Significant Indirect callers of kbase_pm_check_transitions_nolock()
-	 */
-	/* kbase_pm_request_cores */
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"),
-	/* kbase_pm_release_cores */
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"),
-	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END,   "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"),
-	/*
-	 * END: SW Functions that call kbase_pm_check_transitions_nolock()
-	 */
-
-	/*
-	 * HW Events
-	 */
-	KBASE_TIMELINE_TRACE_CODE(HW_MMU_FAULT,
-"HW: MMU Fault", "%d,%d,%d", "_tgid,fault_type,fault_stage,asid"),
-	KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX,
-"HW: Job Chain start (SW approximated)", "%d,%d,%d",
-"_tgid,job_slot,_consumerof_atom_number_ready"),
-	KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX,
-"HW: Job Chain stop (SW approximated)",  "%d,%d,%d",
-"_tgid,job_slot,_producerof_atom_number_completed")
diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.h b/drivers/gpu/arm/midgard/mali_kbase_utility.h
index d36285e..f2e5a33 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_utility.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_utility.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2013, 2015, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -39,4 +39,28 @@
  */
 bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry);
 
+
+static inline void kbase_timer_setup(struct timer_list *timer,
+				     void (*callback)(struct timer_list *timer))
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
+	setup_timer(timer, (void (*)(unsigned long)) callback,
+			(unsigned long) timer);
+#else
+	timer_setup(timer, callback, 0);
+#endif
+}
+
+#ifndef WRITE_ONCE
+	#ifdef ASSIGN_ONCE
+		#define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x)
+	#else
+		#define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val))
+	#endif
+#endif
+
+#ifndef READ_ONCE
+	#define READ_ONCE(x) ACCESS_ONCE(x)
+#endif
+
 #endif				/* _KBASE_UTILITY_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
index 60308be..df936cfd 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -82,7 +82,9 @@
 
 /**
  * struct kbase_vinstr_context - vinstr context per device
- * @lock:              protects the entire vinstr context
+ * @lock:              protects the entire vinstr context, but the list of
+ *                     vinstr clients can be updated outside the lock using
+ *                     @state_lock.
  * @kbdev:             pointer to kbase device
  * @kctx:              pointer to kbase context
  * @vmap:              vinstr vmap for mapping hwcnt dump buffer
@@ -94,12 +96,14 @@
  * @reprogram:         when true, reprogram hwcnt block with the new set of
  *                     counters
  * @state:             vinstr state
- * @state_lock:        protects information about vinstr state
+ * @state_lock:        protects information about vinstr state and list of
+ *                     clients.
  * @suspend_waitq:     notification queue to trigger state re-validation
  * @suspend_cnt:       reference counter of vinstr's suspend state
  * @suspend_work:      worker to execute on entering suspended state
  * @resume_work:       worker to execute on leaving suspended state
- * @nclients:          number of attached clients, pending or otherwise
+ * @nclients:          number of attached clients, pending or idle
+ * @nclients_suspended: number of attached but suspended clients
  * @waiting_clients:   head of list of clients being periodically sampled
  * @idle_clients:      head of list of clients being idle
  * @suspended_clients: head of list of clients being suspended
@@ -109,13 +113,22 @@
  * @clients_present:   when true, we have at least one client
  *                     Note: this variable is in sync. with nclients and is
  *                     present to preserve simplicity. Protected by state_lock.
+ * @need_suspend:      when true, a suspend has been requested while a resume is
+ *                     in progress. Resume worker should queue a suspend.
+ * @need_resume:       when true, a resume has been requested while a suspend is
+ *                     in progress. Suspend worker should queue a resume.
+ * @forced_suspend:    when true, the suspend of vinstr needs to take place
+ *                     regardless of the kernel/user space clients attached
+ *                     to it. In particular, this flag is set when the suspend
+ *                     of vinstr is requested on entering protected mode or at
+ *                     the time of device suspend.
  */
 struct kbase_vinstr_context {
 	struct mutex             lock;
 	struct kbase_device      *kbdev;
 	struct kbase_context     *kctx;
 
-	struct kbase_vmap_struct vmap;
+	struct kbase_vmap_struct *vmap;
 	u64                      gpu_va;
 	void                     *cpu_va;
 	size_t                   dump_size;
@@ -130,6 +143,7 @@
 	struct work_struct       resume_work;
 
 	u32                      nclients;
+	u32                      nclients_suspended;
 	struct list_head         waiting_clients;
 	struct list_head         idle_clients;
 	struct list_head         suspended_clients;
@@ -139,6 +153,10 @@
 	atomic_t                 request_pending;
 
 	bool                     clients_present;
+
+	bool                     need_suspend;
+	bool                     need_resume;
+	bool                     forced_suspend;
 };
 
 /**
@@ -161,6 +179,7 @@
  * @write_idx:     index of buffer being written by dumping service
  * @waitq:         client's notification queue
  * @pending:       when true, client has attached but hwcnt not yet updated
+ * @suspended:     when true, client is suspended
  */
 struct kbase_vinstr_client {
 	struct kbase_vinstr_context        *vinstr_ctx;
@@ -181,6 +200,7 @@
 	atomic_t                           write_idx;
 	wait_queue_head_t                  waitq;
 	bool                               pending;
+	bool                               suspended;
 };
 
 /**
@@ -195,6 +215,9 @@
 
 /*****************************************************************************/
 
+static void kbase_vinstr_update_suspend(
+		struct kbase_vinstr_context *vinstr_ctx);
+
 static int kbasep_vinstr_service_task(void *data);
 
 static unsigned int kbasep_vinstr_hwcnt_reader_poll(
@@ -226,14 +249,14 @@
 {
 	struct kbase_context *kctx = vinstr_ctx->kctx;
 	struct kbase_device *kbdev = kctx->kbdev;
-	struct kbase_uk_hwcnt_setup setup;
+	struct kbase_ioctl_hwcnt_enable enable;
 	int err;
 
-	setup.dump_buffer = vinstr_ctx->gpu_va;
-	setup.jm_bm       = vinstr_ctx->bitmap[JM_HWCNT_BM];
-	setup.tiler_bm    = vinstr_ctx->bitmap[TILER_HWCNT_BM];
-	setup.shader_bm   = vinstr_ctx->bitmap[SHADER_HWCNT_BM];
-	setup.mmu_l2_bm   = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM];
+	enable.dump_buffer = vinstr_ctx->gpu_va;
+	enable.jm_bm       = vinstr_ctx->bitmap[JM_HWCNT_BM];
+	enable.tiler_bm    = vinstr_ctx->bitmap[TILER_HWCNT_BM];
+	enable.shader_bm   = vinstr_ctx->bitmap[SHADER_HWCNT_BM];
+	enable.mmu_l2_bm   = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM];
 
 	/* Mark the context as active so the GPU is kept turned on */
 	/* A suspend won't happen here, because we're in a syscall from a
@@ -242,7 +265,7 @@
 
 	/* Schedule the context in */
 	kbasep_js_schedule_privileged_ctx(kbdev, kctx);
-	err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &setup);
+	err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
 	if (err) {
 		/* Release the context. This had its own Power Manager Active
 		 * reference */
@@ -315,9 +338,15 @@
 #endif /* CONFIG_MALI_NO_MALI */
 	{
 		/* assume v5 for now */
+#ifdef CONFIG_MALI_NO_MALI
+		u32 nr_l2 = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
+		u64 core_mask =
+			(1ULL << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
+#else
 		base_gpu_props *props = &kbdev->gpu_props.props;
 		u32 nr_l2 = props->l2_props.num_l2_slices;
 		u64 core_mask = props->coherency_info.group[0].core_mask;
+#endif
 		u32 nr_blocks = fls64(core_mask);
 
 		/* JM and tiler counter blocks are always present */
@@ -342,7 +371,11 @@
 	struct kbase_context *kctx = vinstr_ctx->kctx;
 	u64 flags, nr_pages;
 
-	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR;
+	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR |
+		BASE_MEM_PERMANENT_KERNEL_MAPPING | BASE_MEM_CACHED_CPU;
+	if (kctx->kbdev->mmu_mode->flags &
+			KBASE_MMU_MODE_HAS_NON_CACHEABLE)
+		flags |= BASE_MEM_UNCACHED_GPU;
 	vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
 	nr_pages = PFN_UP(vinstr_ctx->dump_size);
 
@@ -351,11 +384,9 @@
 	if (!reg)
 		return -ENOMEM;
 
-	vinstr_ctx->cpu_va = kbase_vmap(
-			kctx,
-			vinstr_ctx->gpu_va,
-			vinstr_ctx->dump_size,
-			&vinstr_ctx->vmap);
+	vinstr_ctx->cpu_va = kbase_phy_alloc_mapping_get(kctx,
+			vinstr_ctx->gpu_va, &vinstr_ctx->vmap);
+
 	if (!vinstr_ctx->cpu_va) {
 		kbase_mem_free(kctx, vinstr_ctx->gpu_va);
 		return -ENOMEM;
@@ -369,7 +400,7 @@
 {
 	struct kbase_context *kctx = vinstr_ctx->kctx;
 
-	kbase_vunmap(kctx, &vinstr_ctx->vmap);
+	kbase_phy_alloc_mapping_put(kctx, vinstr_ctx->vmap);
 	kbase_mem_free(kctx, vinstr_ctx->gpu_va);
 }
 
@@ -472,18 +503,24 @@
 	struct kbasep_kctx_list_element *element;
 	struct kbasep_kctx_list_element *tmp;
 	bool                            found = false;
+	bool                            hwcnt_disabled = false;
 	unsigned long                   flags;
 
 	/* Release hw counters dumping resources. */
 	vinstr_ctx->thread = NULL;
-	disable_hwcnt(vinstr_ctx);
-	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
 
 	/* Simplify state transitions by specifying that we have no clients. */
 	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
 	vinstr_ctx->clients_present = false;
+	if ((VINSTR_SUSPENDED == vinstr_ctx->state) || (VINSTR_RESUMING == vinstr_ctx->state))
+		hwcnt_disabled = true;
 	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 
+	if (!hwcnt_disabled)
+		disable_hwcnt(vinstr_ctx);
+
+	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+
 	/* Remove kernel context from the device's contexts list. */
 	mutex_lock(&kbdev->kctx_list_lock);
 	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
@@ -523,6 +560,8 @@
 {
 	struct task_struct         *thread = NULL;
 	struct kbase_vinstr_client *cli;
+	unsigned long flags;
+	bool clients_present = false;
 
 	KBASE_DEBUG_ASSERT(vinstr_ctx);
 
@@ -548,10 +587,14 @@
 	hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap);
 	vinstr_ctx->reprogram = true;
 
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
 	/* If this is the first client, create the vinstr kbase
 	 * context. This context is permanently resident until the
 	 * last client exits. */
-	if (!vinstr_ctx->nclients) {
+	if (!clients_present) {
 		hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap);
 		if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0)
 			goto error;
@@ -606,8 +649,11 @@
 	atomic_set(&cli->write_idx, 0);
 	init_waitqueue_head(&cli->waitq);
 
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
 	vinstr_ctx->nclients++;
 	list_add(&cli->list, &vinstr_ctx->idle_clients);
+	kbase_vinstr_update_suspend(vinstr_ctx);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 
 	mutex_unlock(&vinstr_ctx->lock);
 
@@ -620,7 +666,7 @@
 				(unsigned long)cli->dump_buffers,
 				get_order(cli->dump_size * cli->buffer_count));
 	kfree(cli->accum_buffer);
-	if (!vinstr_ctx->nclients && vinstr_ctx->kctx) {
+	if (!clients_present && vinstr_ctx->kctx) {
 		thread = vinstr_ctx->thread;
 		kbasep_vinstr_destroy_kctx(vinstr_ctx);
 	}
@@ -642,18 +688,19 @@
 	struct task_struct          *thread = NULL;
 	u32 zerobitmap[4] = { 0 };
 	int cli_found = 0;
+	unsigned long flags;
+	bool clients_present;
 
 	KBASE_DEBUG_ASSERT(cli);
 	vinstr_ctx = cli->vinstr_ctx;
 	KBASE_DEBUG_ASSERT(vinstr_ctx);
 
 	mutex_lock(&vinstr_ctx->lock);
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
 
 	list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) {
 		if (iter == cli) {
-			vinstr_ctx->reprogram = true;
 			cli_found = 1;
-			list_del(&iter->list);
 			break;
 		}
 	}
@@ -661,15 +708,47 @@
 		list_for_each_entry_safe(
 				iter, tmp, &vinstr_ctx->waiting_clients, list) {
 			if (iter == cli) {
-				vinstr_ctx->reprogram = true;
 				cli_found = 1;
-				list_del(&iter->list);
+				break;
+			}
+		}
+	}
+	if (!cli_found) {
+		list_for_each_entry_safe(
+				iter, tmp, &vinstr_ctx->suspended_clients, list) {
+			if (iter == cli) {
+				cli_found = 1;
 				break;
 			}
 		}
 	}
 	KBASE_DEBUG_ASSERT(cli_found);
 
+	if (cli_found) {
+		vinstr_ctx->reprogram = true;
+		list_del(&iter->list);
+	}
+
+	if (!cli->suspended)
+		vinstr_ctx->nclients--;
+	else
+		vinstr_ctx->nclients_suspended--;
+
+	kbase_vinstr_update_suspend(vinstr_ctx);
+
+	clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended);
+
+	/* Rebuild context bitmap now that the client has detached */
+	hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap);
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+	list_for_each_entry(iter, &vinstr_ctx->suspended_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
 	kfree(cli->dump_buffers_meta);
 	free_pages(
 			(unsigned long)cli->dump_buffers,
@@ -677,19 +756,11 @@
 	kfree(cli->accum_buffer);
 	kfree(cli);
 
-	vinstr_ctx->nclients--;
-	if (!vinstr_ctx->nclients) {
+	if (!clients_present) {
 		thread = vinstr_ctx->thread;
 		kbasep_vinstr_destroy_kctx(vinstr_ctx);
 	}
 
-	/* Rebuild context bitmap now that the client has detached */
-	hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap);
-	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list)
-		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
-	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list)
-		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
-
 	mutex_unlock(&vinstr_ctx->lock);
 
 	/* Thread must be stopped after lock is released. */
@@ -977,9 +1048,18 @@
 		rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx);
 	WARN_ON(rcode);
 
+	if (!rcode) {
+		/* Invalidate the kernel buffer before reading from it.
+		 * As the vinstr_ctx->lock is already held by the caller, the
+		 * unmap of kernel buffer cannot take place simultaneously.
+		 */
+		lockdep_assert_held(&vinstr_ctx->lock);
+		kbase_sync_mem_regions(vinstr_ctx->kctx, vinstr_ctx->vmap,
+				KBASE_SYNC_TO_CPU);
+	}
+
 	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-	switch (vinstr_ctx->state)
-	{
+	switch (vinstr_ctx->state) {
 	case VINSTR_SUSPENDING:
 		schedule_work(&vinstr_ctx->suspend_work);
 		break;
@@ -990,12 +1070,13 @@
 	default:
 		break;
 	}
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 
 	/* Accumulate values of collected counters. */
 	if (!rcode)
 		accum_clients(vinstr_ctx);
 
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
 	return rcode;
 }
 
@@ -1100,6 +1181,7 @@
 
 		if (!reprogram_hwcnt(vinstr_ctx)) {
 			vinstr_ctx->reprogram = false;
+			spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
 			list_for_each_entry(
 					iter,
 					&vinstr_ctx->idle_clients,
@@ -1110,6 +1192,7 @@
 					&vinstr_ctx->waiting_clients,
 					list)
 				iter->pending = false;
+			spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 		}
 	}
 }
@@ -1128,6 +1211,7 @@
 		enum base_hwcnt_reader_event event_id)
 {
 	int rcode = 0;
+	unsigned long flags;
 
 	/* Copy collected counters to user readable buffer. */
 	if (cli->buffer_count)
@@ -1138,18 +1222,23 @@
 	else
 		rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli);
 
+	/* Prepare for next request. */
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+	spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags);
+	/* Check if client was put to suspend state while it was being updated */
+	if (cli->suspended)
+		rcode = -EINVAL;
+	spin_unlock_irqrestore(&cli->vinstr_ctx->state_lock, flags);
+
 	if (rcode)
 		goto exit;
 
-
 	/* Notify client. Make sure all changes to memory are visible. */
 	wmb();
 	atomic_inc(&cli->write_idx);
 	wake_up_interruptible(&cli->waitq);
 
-	/* Prepare for next request. */
-	memset(cli->accum_buffer, 0, cli->dump_size);
-
 exit:
 	return rcode;
 }
@@ -1208,6 +1297,7 @@
 		struct kbase_vinstr_client *cli = NULL;
 		struct kbase_vinstr_client *tmp;
 		int                        rcode;
+		unsigned long              flags;
 
 		u64              timestamp = kbasep_vinstr_get_timestamp();
 		u64              dump_time = 0;
@@ -1220,6 +1310,7 @@
 		if (current == vinstr_ctx->thread) {
 			atomic_set(&vinstr_ctx->request_pending, 0);
 
+			spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
 			if (!list_empty(&vinstr_ctx->waiting_clients)) {
 				cli = list_first_entry(
 						&vinstr_ctx->waiting_clients,
@@ -1227,6 +1318,7 @@
 						list);
 				dump_time = cli->dump_time;
 			}
+			spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 		}
 
 		if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) {
@@ -1255,6 +1347,7 @@
 
 		INIT_LIST_HEAD(&expired_requests);
 
+		spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
 		/* Find all expired requests. */
 		list_for_each_entry_safe(
 				cli,
@@ -1273,18 +1366,29 @@
 		}
 
 		/* Fill data for each request found. */
-		list_for_each_entry_safe(cli, tmp, &expired_requests, list) {
+		while (!list_empty(&expired_requests)) {
+			cli = list_first_entry(&expired_requests,
+					struct kbase_vinstr_client, list);
+
 			/* Ensure that legacy buffer will not be used from
 			 * this kthread context. */
 			BUG_ON(0 == cli->buffer_count);
 			/* Expect only periodically sampled clients. */
 			BUG_ON(0 == cli->dump_interval);
 
+			/* Release the spinlock, as filling the data in client's
+			 * userspace buffer could result in page faults. */
+			spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 			if (!rcode)
 				kbasep_vinstr_update_client(
 						cli,
 						timestamp,
 						BASE_HWCNT_READER_EVENT_PERIODIC);
+			spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags);
+
+			/* This client got suspended, move to the next one. */
+			if (cli->suspended)
+				continue;
 
 			/* Set new dumping time. Drop missed probing times. */
 			do {
@@ -1296,6 +1400,7 @@
 					cli,
 					&vinstr_ctx->waiting_clients);
 		}
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 
 		/* Reprogram counters set if required. */
 		kbasep_vinstr_reprogram(vinstr_ctx);
@@ -1410,10 +1515,18 @@
 		struct kbase_vinstr_client *cli, u32 interval)
 {
 	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	unsigned long flags;
 
 	KBASE_DEBUG_ASSERT(vinstr_ctx);
 
 	mutex_lock(&vinstr_ctx->lock);
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+
+	if (cli->suspended) {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+		mutex_unlock(&vinstr_ctx->lock);
+		return -ENOMEM;
+	}
 
 	list_del(&cli->list);
 
@@ -1435,6 +1548,7 @@
 		list_add(&cli->list, &vinstr_ctx->idle_clients);
 	}
 
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 	mutex_unlock(&vinstr_ctx->lock);
 
 	return 0;
@@ -1739,17 +1853,29 @@
 	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
 	vinstr_ctx->state = VINSTR_SUSPENDED;
 	wake_up_all(&vinstr_ctx->suspend_waitq);
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 
-	mutex_unlock(&vinstr_ctx->lock);
+	if (vinstr_ctx->need_resume) {
+		vinstr_ctx->need_resume = false;
+		vinstr_ctx->state = VINSTR_RESUMING;
+		schedule_work(&vinstr_ctx->resume_work);
 
-	/* Kick GPU scheduler to allow entering protected mode.
-	 * This must happen after vinstr was suspended. */
-	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	} else {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		mutex_unlock(&vinstr_ctx->lock);
+
+		/* Kick GPU scheduler to allow entering protected mode.
+		 * This must happen after vinstr was suspended.
+		 */
+		kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+	}
 }
 
 /**
- * kbasep_vinstr_suspend_worker - worker resuming vinstr module
+ * kbasep_vinstr_resume_worker - worker resuming vinstr module
  * @data: pointer to work structure
  */
 static void kbasep_vinstr_resume_worker(struct work_struct *data)
@@ -1768,15 +1894,27 @@
 	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
 	vinstr_ctx->state = VINSTR_IDLE;
 	wake_up_all(&vinstr_ctx->suspend_waitq);
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 
-	mutex_unlock(&vinstr_ctx->lock);
+	if (vinstr_ctx->need_suspend) {
+		vinstr_ctx->need_suspend = false;
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		schedule_work(&vinstr_ctx->suspend_work);
 
-	/* Kick GPU scheduler to allow entering protected mode.
-	 * Note that scheduler state machine might requested re-entry to
-	 * protected mode before vinstr was resumed.
-	 * This must happen after vinstr was release. */
-	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	} else {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		mutex_unlock(&vinstr_ctx->lock);
+
+		/* Kick GPU scheduler to allow entering protected mode.
+		 * Note that scheduler state machine might requested re-entry to
+		 * protected mode before vinstr was resumed.
+		 * This must happen after vinstr was release.
+		 */
+		kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+	}
 }
 
 /*****************************************************************************/
@@ -1791,6 +1929,7 @@
 
 	INIT_LIST_HEAD(&vinstr_ctx->idle_clients);
 	INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
+	INIT_LIST_HEAD(&vinstr_ctx->suspended_clients);
 	mutex_init(&vinstr_ctx->lock);
 	spin_lock_init(&vinstr_ctx->state_lock);
 	vinstr_ctx->kbdev = kbdev;
@@ -1824,27 +1963,35 @@
 
 		if (list_empty(list)) {
 			list = &vinstr_ctx->waiting_clients;
-			if (list_empty(list))
-				break;
+			if (list_empty(list)) {
+				list = &vinstr_ctx->suspended_clients;
+				if (list_empty(list))
+					break;
+			}
 		}
 
 		cli = list_first_entry(list, struct kbase_vinstr_client, list);
 		list_del(&cli->list);
+		if (!cli->suspended)
+			vinstr_ctx->nclients--;
+		else
+			vinstr_ctx->nclients_suspended--;
 		kfree(cli->accum_buffer);
 		kfree(cli);
-		vinstr_ctx->nclients--;
 	}
 	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients);
+	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients_suspended);
 	if (vinstr_ctx->kctx)
 		kbasep_vinstr_destroy_kctx(vinstr_ctx);
 	kfree(vinstr_ctx);
 }
 
 int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx,
-		struct kbase_uk_hwcnt_reader_setup *setup)
+		struct kbase_ioctl_hwcnt_reader_setup *setup)
 {
 	struct kbase_vinstr_client  *cli;
 	u32                         bitmap[4];
+	int                         fd;
 
 	KBASE_DEBUG_ASSERT(vinstr_ctx);
 	KBASE_DEBUG_ASSERT(setup);
@@ -1859,31 +2006,32 @@
 			vinstr_ctx,
 			setup->buffer_count,
 			bitmap,
-			&setup->fd,
+			&fd,
 			NULL);
 
 	if (!cli)
 		return -ENOMEM;
 
-	return 0;
+	kbase_vinstr_wait_for_ready(vinstr_ctx);
+	return fd;
 }
 
 int kbase_vinstr_legacy_hwc_setup(
 		struct kbase_vinstr_context *vinstr_ctx,
 		struct kbase_vinstr_client  **cli,
-		struct kbase_uk_hwcnt_setup *setup)
+		struct kbase_ioctl_hwcnt_enable *enable)
 {
 	KBASE_DEBUG_ASSERT(vinstr_ctx);
-	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(enable);
 	KBASE_DEBUG_ASSERT(cli);
 
-	if (setup->dump_buffer) {
+	if (enable->dump_buffer) {
 		u32 bitmap[4];
 
-		bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
-		bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
-		bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
-		bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+		bitmap[SHADER_HWCNT_BM] = enable->shader_bm;
+		bitmap[TILER_HWCNT_BM]  = enable->tiler_bm;
+		bitmap[MMU_L2_HWCNT_BM] = enable->mmu_l2_bm;
+		bitmap[JM_HWCNT_BM]     = enable->jm_bm;
 
 		if (*cli)
 			return -EBUSY;
@@ -1892,11 +2040,13 @@
 				vinstr_ctx,
 				0,
 				bitmap,
-				(void *)(long)setup->dump_buffer,
+				(void *)(uintptr_t)enable->dump_buffer,
 				NULL);
 
 		if (!(*cli))
 			return -ENOMEM;
+
+		kbase_vinstr_wait_for_ready(vinstr_ctx);
 	} else {
 		if (!*cli)
 			return -EINVAL;
@@ -1910,9 +2060,10 @@
 
 struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
 		struct kbase_vinstr_context *vinstr_ctx,
-		struct kbase_uk_hwcnt_reader_setup *setup,
+		struct kbase_ioctl_hwcnt_reader_setup *setup,
 		void *kernel_buffer)
 {
+	struct kbase_vinstr_client *kernel_client;
 	u32 bitmap[4];
 
 	if (!vinstr_ctx || !setup || !kernel_buffer)
@@ -1923,12 +2074,17 @@
 	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
 	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
 
-	return kbasep_vinstr_attach_client(
-			vinstr_ctx,
-			0,
-			bitmap,
-			NULL,
-			kernel_buffer);
+	kernel_client = kbasep_vinstr_attach_client(
+				vinstr_ctx,
+				0,
+				bitmap,
+				NULL,
+				kernel_buffer);
+
+	if (kernel_client)
+		kbase_vinstr_wait_for_ready(vinstr_ctx);
+
+	return kernel_client;
 }
 KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup);
 
@@ -2010,6 +2166,7 @@
 	KBASE_DEBUG_ASSERT(vinstr_ctx);
 
 	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->forced_suspend = true;
 	switch (vinstr_ctx->state) {
 	case VINSTR_SUSPENDED:
 		vinstr_ctx->suspend_cnt++;
@@ -2036,13 +2193,42 @@
 		vinstr_ctx->state = VINSTR_SUSPENDING;
 		break;
 
-	case VINSTR_SUSPENDING:
-		/* fall through */
 	case VINSTR_RESUMING:
+		vinstr_ctx->need_suspend = true;
+		break;
+
+	case VINSTR_SUSPENDING:
 		break;
 
 	default:
-		BUG();
+		KBASE_DEBUG_ASSERT(0);
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	return ret;
+}
+
+static int kbase_vinstr_is_ready(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	int ret = -EAGAIN;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDED:
+	case VINSTR_RESUMING:
+	case VINSTR_SUSPENDING:
+		break;
+
+	case VINSTR_IDLE:
+	case VINSTR_DUMPING:
+		ret = 0;
+		break;
+	default:
+		KBASE_DEBUG_ASSERT(0);
 		break;
 	}
 	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
@@ -2056,6 +2242,58 @@
 			(0 == kbase_vinstr_try_suspend(vinstr_ctx)));
 }
 
+void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx)
+{
+	wait_event(vinstr_ctx->suspend_waitq,
+			(0 == kbase_vinstr_is_ready(vinstr_ctx)));
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_wait_for_ready);
+
+/**
+ * kbase_vinstr_update_suspend - Update vinstr suspend/resume status depending
+ *                               on nclients
+ * @vinstr_ctx: vinstr context pointer
+ *
+ * This function should be called whenever vinstr_ctx->nclients changes. This
+ * may cause vinstr to be suspended or resumed, depending on the number of
+ * clients and whether IPA is suspended or not.
+ */
+static void kbase_vinstr_update_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	lockdep_assert_held(&vinstr_ctx->state_lock);
+
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDED:
+		if ((vinstr_ctx->nclients) && (0 == vinstr_ctx->suspend_cnt)) {
+			vinstr_ctx->state = VINSTR_RESUMING;
+			schedule_work(&vinstr_ctx->resume_work);
+		}
+		break;
+
+	case VINSTR_SUSPENDING:
+		if ((vinstr_ctx->nclients) && (!vinstr_ctx->forced_suspend))
+			vinstr_ctx->need_resume = true;
+		break;
+
+	case VINSTR_IDLE:
+		if (!vinstr_ctx->nclients) {
+			vinstr_ctx->state = VINSTR_SUSPENDING;
+			schedule_work(&vinstr_ctx->suspend_work);
+		}
+		break;
+
+	case VINSTR_DUMPING:
+		if (!vinstr_ctx->nclients)
+			vinstr_ctx->state = VINSTR_SUSPENDING;
+		break;
+
+	case VINSTR_RESUMING:
+		if (!vinstr_ctx->nclients)
+			vinstr_ctx->need_suspend = true;
+		break;
+	}
+}
+
 void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx)
 {
 	unsigned long flags;
@@ -2068,6 +2306,7 @@
 		BUG_ON(0 == vinstr_ctx->suspend_cnt);
 		vinstr_ctx->suspend_cnt--;
 		if (0 == vinstr_ctx->suspend_cnt) {
+			vinstr_ctx->forced_suspend = false;
 			if (vinstr_ctx->clients_present) {
 				vinstr_ctx->state = VINSTR_RESUMING;
 				schedule_work(&vinstr_ctx->resume_work);
@@ -2078,3 +2317,45 @@
 	}
 	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 }
+
+void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client)
+{
+	struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+
+	if (!client->suspended) {
+		list_del(&client->list);
+		list_add(&client->list, &vinstr_ctx->suspended_clients);
+
+		vinstr_ctx->nclients--;
+		vinstr_ctx->nclients_suspended++;
+		kbase_vinstr_update_suspend(vinstr_ctx);
+
+		client->suspended = true;
+	}
+
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+}
+
+void kbase_vinstr_resume_client(struct kbase_vinstr_client *client)
+{
+	struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+
+	if (client->suspended) {
+		list_del(&client->list);
+		list_add(&client->list, &vinstr_ctx->idle_clients);
+
+		vinstr_ctx->nclients++;
+		vinstr_ctx->nclients_suspended--;
+		kbase_vinstr_update_suspend(vinstr_ctx);
+
+		client->suspended = false;
+	}
+
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
index af7c7b6..d32799f 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,35 +24,13 @@
 #define _KBASE_VINSTR_H_
 
 #include <mali_kbase_hwcnt_reader.h>
+#include <mali_kbase_ioctl.h>
 
 /*****************************************************************************/
 
 struct kbase_vinstr_context;
 struct kbase_vinstr_client;
 
-struct kbase_uk_hwcnt_setup {
-	/* IN */
-	u64 dump_buffer;
-	u32 jm_bm;
-	u32 shader_bm;
-	u32 tiler_bm;
-	u32 unused_1; /* keep for backwards compatibility */
-	u32 mmu_l2_bm;
-	u32 padding;
-	/* OUT */
-};
-
-struct kbase_uk_hwcnt_reader_setup {
-	/* IN */
-	u32 buffer_count;
-	u32 jm_bm;
-	u32 shader_bm;
-	u32 tiler_bm;
-	u32 mmu_l2_bm;
-
-	/* OUT */
-	s32 fd;
-};
 /*****************************************************************************/
 
 /**
@@ -74,24 +52,24 @@
  * @vinstr_ctx: vinstr context
  * @setup:      reader's configuration
  *
- * Return: zero on success
+ * Return: file descriptor on success and a (negative) error code otherwise
  */
 int kbase_vinstr_hwcnt_reader_setup(
 		struct kbase_vinstr_context        *vinstr_ctx,
-		struct kbase_uk_hwcnt_reader_setup *setup);
+		struct kbase_ioctl_hwcnt_reader_setup *setup);
 
 /**
  * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping
  * @vinstr_ctx: vinstr context
  * @cli:        pointer where to store pointer to new vinstr client structure
- * @setup:      hwc configuration
+ * @enable:      hwc configuration
  *
  * Return: zero on success
  */
 int kbase_vinstr_legacy_hwc_setup(
 		struct kbase_vinstr_context *vinstr_ctx,
 		struct kbase_vinstr_client  **cli,
-		struct kbase_uk_hwcnt_setup *setup);
+		struct kbase_ioctl_hwcnt_enable *enable);
 
 /**
  * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side
@@ -100,13 +78,13 @@
  * @setup:         reader's configuration
  * @kernel_buffer: pointer to dump buffer
  *
- * setup->buffer_count and setup->fd are not used for kernel side clients.
+ * setup->buffer_count is not used for kernel side clients.
  *
  * Return: pointer to client structure, or NULL on failure
  */
 struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
 		struct kbase_vinstr_context *vinstr_ctx,
-		struct kbase_uk_hwcnt_reader_setup *setup,
+		struct kbase_ioctl_hwcnt_reader_setup *setup,
 		void *kernel_buffer);
 
 /**
@@ -156,6 +134,16 @@
 void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx);
 
 /**
+ * kbase_vinstr_wait_for_ready - waits for the vinstr context to get ready
+ * @vinstr_ctx: vinstr context
+ *
+ * Function waits for the vinstr to become ready for dumping. It can be in the
+ * resuming state after the client was attached but the client currently expects
+ * that vinstr is ready for dumping immediately post attach.
+ */
+void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
  * kbase_vinstr_resume - resumes operation of a given vinstr context
  * @vinstr_ctx: vinstr context
  *
@@ -178,5 +166,17 @@
  */
 void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli);
 
+/**
+ * kbase_vinstr_suspend_client - Suspend vinstr client
+ * @client: pointer to vinstr client
+ */
+void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client);
+
+/**
+ * kbase_vinstr_resume_client - Resume vinstr client
+ * @client: pointer to vinstr client
+ */
+void kbase_vinstr_resume_client(struct kbase_vinstr_client *client);
+
 #endif /* _KBASE_VINSTR_H_ */
 
diff --git a/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
index da2ffaf..920562e 100644
--- a/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
+++ b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -176,11 +176,10 @@
 DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2);
 DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED);
 DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER);
-DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED);
 DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED);
-DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED);
-DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE);
-DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_TILER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_TILER_NEEDED);
 DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
 DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
 DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h
index 5e83ee8..8d9f7b6 100644
--- a/drivers/gpu/arm/midgard/mali_midg_regmap.h
+++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -34,8 +34,7 @@
 #define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
 #define GPU_ID                  0x000	/* (RO) GPU and revision identifier */
 #define L2_FEATURES             0x004	/* (RO) Level 2 cache features */
-#define SUSPEND_SIZE            0x008   /* (RO) Fixed-function suspend buffer
-						size */
+#define CORE_FEATURES           0x008	/* (RO) Shader Core Features */
 #define TILER_FEATURES          0x00C	/* (RO) Tiler Features */
 #define MEM_FEATURES            0x010	/* (RO) Memory system features */
 #define MMU_FEATURES            0x014	/* (RO) MMU features */
@@ -46,6 +45,7 @@
 #define GPU_IRQ_MASK            0x028	/* (RW) */
 #define GPU_IRQ_STATUS          0x02C	/* (RO) */
 
+
 /* IRQ flags */
 #define GPU_FAULT               (1 << 0)	/* A GPU Fault has occurred */
 #define MULTIPLE_GPU_FAULTS     (1 << 7)	/* More than one GPU Fault occurred. */
@@ -93,6 +93,9 @@
 #define THREAD_MAX_WORKGROUP_SIZE 0x0A4	/* (RO) Maximum workgroup size */
 #define THREAD_MAX_BARRIER_SIZE 0x0A8	/* (RO) Maximum threads waiting at a barrier */
 #define THREAD_FEATURES         0x0AC	/* (RO) Thread features */
+#define THREAD_TLS_ALLOC        0x310   /* (RO) Number of threads per core that
+					 * TLS must be allocated for
+					 */
 
 #define TEXTURE_FEATURES_0      0x0B0	/* (RO) Support flags for indexed texture formats 0..31 */
 #define TEXTURE_FEATURES_1      0x0B4	/* (RO) Support flags for indexed texture formats 32..63 */
@@ -213,6 +216,9 @@
 #define JOB_IRQ_JS_STATE        0x010	/* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
 #define JOB_IRQ_THROTTLE        0x014	/* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
 
+/* JOB IRQ flags */
+#define JOB_IRQ_GLOBAL_IF       (1 << 18)   /* Global interface interrupt received */
+
 #define JOB_SLOT0               0x800	/* Configuration registers for job slot 0 */
 #define JOB_SLOT1               0x880	/* Configuration registers for job slot 1 */
 #define JOB_SLOT2               0x900	/* Configuration registers for job slot 2 */
@@ -495,7 +501,7 @@
 #define PRFCNT_CONFIG_MODE_MANUAL 1	/* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
 #define PRFCNT_CONFIG_MODE_TILE   2	/* The performance counters are enabled, and are written out each time a tile finishes rendering. */
 
-/* AS<n>_MEMATTR values: */
+/* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */
 /* Use GPU implementation-defined caching policy. */
 #define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
 /* The attribute set to force all resources to be cached. */
@@ -507,6 +513,12 @@
 #define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
 /* Set to write back memory, outer caching */
 #define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
+/* Set to inner non-cacheable, outer-non-cacheable
+ * Setting defined by the alloc bits is ignored, but set to a valid encoding:
+ * - no-alloc on read
+ * - no alloc on write
+ */
+#define AS_MEMATTR_AARCH64_NON_CACHEABLE  0x4Cull
 
 /* Use GPU implementation-defined  caching policy. */
 #define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
@@ -518,6 +530,11 @@
 #define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
 /* Set to write back memory, outer caching */
 #define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
+/* There is no LPAE support for non-cacheable, since the memory type is always
+ * write-back.
+ * Marking this setting as reserved for LPAE
+ */
+#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED
 
 /* Symbols for default MEMATTR to use
  * Default is - HW implementation defined caching */
@@ -534,6 +551,8 @@
 #define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
 /* Outer coherent, write alloc inner */
 #define AS_MEMATTR_INDEX_OUTER_WA              4
+/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */
+#define AS_MEMATTR_INDEX_NON_CACHEABLE         5
 
 /* JS<n>_FEATURES register */
 
diff --git a/drivers/gpu/arm/midgard/mali_timeline.h b/drivers/gpu/arm/midgard/mali_timeline.h
deleted file mode 100644
index d0deeadf..0000000
--- a/drivers/gpu/arm/midgard/mali_timeline.h
+++ /dev/null
@@ -1,401 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM mali_timeline
-
-#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _MALI_TIMELINE_H
-
-#include <linux/tracepoint.h>
-
-TRACE_EVENT(mali_timeline_atoms_in_flight,
-
-	TP_PROTO(u64 ts_sec,
-		u32 ts_nsec,
-		int tgid,
-		int count),
-
-	TP_ARGS(ts_sec,
-		ts_nsec,
-		tgid,
-		count),
-
-	TP_STRUCT__entry(
-			__field(u64, ts_sec)
-			__field(u32, ts_nsec)
-			__field(int, tgid)
-			__field(int, count)
-	),
-
-	TP_fast_assign(
-		__entry->ts_sec = ts_sec;
-		__entry->ts_nsec = ts_nsec;
-		__entry->tgid = tgid;
-		__entry->count = count;
-	),
-
-	TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT,
-				(int)__entry->ts_sec,
-				(int)__entry->ts_nsec,
-				__entry->tgid,
-				__entry->count)
-);
-
-
-TRACE_EVENT(mali_timeline_atom,
-
-	TP_PROTO(u64 ts_sec,
-		u32 ts_nsec,
-		int event_type,
-		int tgid,
-		int atom_id),
-
-	TP_ARGS(ts_sec,
-		ts_nsec,
-		event_type,
-		tgid,
-		atom_id),
-
-	TP_STRUCT__entry(
-			__field(u64, ts_sec)
-			__field(u32, ts_nsec)
-			__field(int, event_type)
-			__field(int, tgid)
-			__field(int, atom_id)
-	),
-
-	TP_fast_assign(
-		__entry->ts_sec = ts_sec;
-		__entry->ts_nsec = ts_nsec;
-		__entry->event_type = event_type;
-		__entry->tgid = tgid;
-		__entry->atom_id = atom_id;
-	),
-
-	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
-			(int)__entry->ts_sec,
-			(int)__entry->ts_nsec,
-			__entry->tgid,
-			__entry->atom_id,
-			__entry->atom_id)
-);
-
-TRACE_EVENT(mali_timeline_gpu_slot_active,
-
-	TP_PROTO(u64 ts_sec,
-		u32 ts_nsec,
-		int event_type,
-		int tgid,
-		int js,
-		int count),
-
-	TP_ARGS(ts_sec,
-		ts_nsec,
-		event_type,
-		tgid,
-		js,
-		count),
-
-	TP_STRUCT__entry(
-			__field(u64, ts_sec)
-			__field(u32, ts_nsec)
-			__field(int, event_type)
-			__field(int, tgid)
-			__field(int, js)
-			__field(int, count)
-	),
-
-	TP_fast_assign(
-		__entry->ts_sec = ts_sec;
-		__entry->ts_nsec = ts_nsec;
-		__entry->event_type = event_type;
-		__entry->tgid = tgid;
-		__entry->js = js;
-		__entry->count = count;
-	),
-
-	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
-			(int)__entry->ts_sec,
-			(int)__entry->ts_nsec,
-			__entry->tgid,
-			__entry->js,
-			__entry->count)
-);
-
-TRACE_EVENT(mali_timeline_gpu_slot_action,
-
-	TP_PROTO(u64 ts_sec,
-		u32 ts_nsec,
-		int event_type,
-		int tgid,
-		int js,
-		int count),
-
-	TP_ARGS(ts_sec,
-		ts_nsec,
-		event_type,
-		tgid,
-		js,
-		count),
-
-	TP_STRUCT__entry(
-			__field(u64, ts_sec)
-			__field(u32, ts_nsec)
-			__field(int, event_type)
-			__field(int, tgid)
-			__field(int, js)
-			__field(int, count)
-	),
-
-	TP_fast_assign(
-		__entry->ts_sec = ts_sec;
-		__entry->ts_nsec = ts_nsec;
-		__entry->event_type = event_type;
-		__entry->tgid = tgid;
-		__entry->js = js;
-		__entry->count = count;
-	),
-
-	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
-			(int)__entry->ts_sec,
-			(int)__entry->ts_nsec,
-			__entry->tgid,
-			__entry->js,
-			__entry->count)
-);
-
-TRACE_EVENT(mali_timeline_gpu_power_active,
-
-	TP_PROTO(u64 ts_sec,
-		u32 ts_nsec,
-		int event_type,
-		int active),
-
-	TP_ARGS(ts_sec,
-		ts_nsec,
-		event_type,
-		active),
-
-	TP_STRUCT__entry(
-			__field(u64, ts_sec)
-			__field(u32, ts_nsec)
-			__field(int, event_type)
-			__field(int, active)
-	),
-
-	TP_fast_assign(
-		__entry->ts_sec = ts_sec;
-		__entry->ts_nsec = ts_nsec;
-		__entry->event_type = event_type;
-		__entry->active = active;
-	),
-
-	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
-			(int)__entry->ts_sec,
-			(int)__entry->ts_nsec,
-			__entry->active)
-
-);
-
-TRACE_EVENT(mali_timeline_l2_power_active,
-
-	TP_PROTO(u64 ts_sec,
-		u32 ts_nsec,
-		int event_type,
-		int state),
-
-	TP_ARGS(ts_sec,
-		ts_nsec,
-		event_type,
-		state),
-
-	TP_STRUCT__entry(
-			__field(u64, ts_sec)
-			__field(u32, ts_nsec)
-			__field(int, event_type)
-			__field(int, state)
-	),
-
-	TP_fast_assign(
-		__entry->ts_sec = ts_sec;
-		__entry->ts_nsec = ts_nsec;
-		__entry->event_type = event_type;
-		__entry->state = state;
-	),
-
-	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
-			(int)__entry->ts_sec,
-			(int)__entry->ts_nsec,
-			__entry->state)
-
-);
-TRACE_EVENT(mali_timeline_pm_event,
-
-	TP_PROTO(u64 ts_sec,
-		u32 ts_nsec,
-		int event_type,
-		int pm_event_type,
-		unsigned int pm_event_id),
-
-	TP_ARGS(ts_sec,
-		ts_nsec,
-		event_type,
-		pm_event_type,
-		pm_event_id),
-
-	TP_STRUCT__entry(
-			__field(u64, ts_sec)
-			__field(u32, ts_nsec)
-			__field(int, event_type)
-			__field(int, pm_event_type)
-			__field(unsigned int, pm_event_id)
-	),
-
-	TP_fast_assign(
-		__entry->ts_sec = ts_sec;
-		__entry->ts_nsec = ts_nsec;
-		__entry->event_type = event_type;
-		__entry->pm_event_type = pm_event_type;
-		__entry->pm_event_id = pm_event_id;
-	),
-
-	TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type,
-			(int)__entry->ts_sec,
-			(int)__entry->ts_nsec,
-			__entry->pm_event_type, __entry->pm_event_id)
-
-);
-
-TRACE_EVENT(mali_timeline_slot_atom,
-
-	TP_PROTO(u64 ts_sec,
-		u32 ts_nsec,
-		int event_type,
-		int tgid,
-		int js,
-		int atom_id),
-
-	TP_ARGS(ts_sec,
-		ts_nsec,
-		event_type,
-		tgid,
-		js,
-		atom_id),
-
-	TP_STRUCT__entry(
-			__field(u64, ts_sec)
-			__field(u32, ts_nsec)
-			__field(int, event_type)
-			__field(int, tgid)
-			__field(int, js)
-			__field(int, atom_id)
-	),
-
-	TP_fast_assign(
-		__entry->ts_sec = ts_sec;
-		__entry->ts_nsec = ts_nsec;
-		__entry->event_type = event_type;
-		__entry->tgid = tgid;
-		__entry->js = js;
-		__entry->atom_id = atom_id;
-	),
-
-	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
-			(int)__entry->ts_sec,
-			(int)__entry->ts_nsec,
-			__entry->tgid,
-			__entry->js,
-			__entry->atom_id)
-);
-
-TRACE_EVENT(mali_timeline_pm_checktrans,
-
-	TP_PROTO(u64 ts_sec,
-		u32 ts_nsec,
-		int trans_code,
-		int trans_id),
-
-	TP_ARGS(ts_sec,
-		ts_nsec,
-		trans_code,
-		trans_id),
-
-	TP_STRUCT__entry(
-			__field(u64, ts_sec)
-			__field(u32, ts_nsec)
-			__field(int, trans_code)
-			__field(int, trans_id)
-	),
-
-	TP_fast_assign(
-		__entry->ts_sec = ts_sec;
-		__entry->ts_nsec = ts_nsec;
-		__entry->trans_code = trans_code;
-		__entry->trans_id = trans_id;
-	),
-
-	TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code,
-			(int)__entry->ts_sec,
-			(int)__entry->ts_nsec,
-			__entry->trans_id)
-
-);
-
-TRACE_EVENT(mali_timeline_context_active,
-
-	TP_PROTO(u64 ts_sec,
-		u32 ts_nsec,
-		int count),
-
-	TP_ARGS(ts_sec,
-		ts_nsec,
-		count),
-
-	TP_STRUCT__entry(
-			__field(u64, ts_sec)
-			__field(u32, ts_nsec)
-			__field(int, count)
-	),
-
-	TP_fast_assign(
-		__entry->ts_sec = ts_sec;
-		__entry->ts_nsec = ts_nsec;
-		__entry->count = count;
-	),
-
-	TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE,
-			(int)__entry->ts_sec,
-			(int)__entry->ts_nsec,
-			__entry->count)
-);
-
-#endif /* _MALI_TIMELINE_H */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
-
diff --git a/drivers/gpu/arm/midgard/mali_uk.h b/drivers/gpu/arm/midgard/mali_uk.h
index 961a4a5..c81f404 100644
--- a/drivers/gpu/arm/midgard/mali_uk.h
+++ b/drivers/gpu/arm/midgard/mali_uk.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010, 2012-2015, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -44,7 +44,7 @@
  * @defgroup uk_api User-Kernel Interface API
  *
  * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
- * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver.
+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver.
  *
  * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
  * kernel-side API (UKK) via an OS-specific communication mechanism.
diff --git a/drivers/gpu/arm/midgard/platform/rk/mali_kbase_rk.c b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_rk.c
index 322ed6b..099ccc2 100644
--- a/drivers/gpu/arm/midgard/platform/rk/mali_kbase_rk.c
+++ b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_rk.c
@@ -101,7 +101,6 @@
 	}
 
 	kbase_rk->is_powered = true;
-	KBASE_TIMELINE_GPU_POWER(kbdev, 1);
 
 	return ret;
 }
@@ -119,7 +118,6 @@
 	dev_dbg(kbdev->dev, "%s: powering off\n", __func__);
 
 	kbase_rk->is_powered = false;
-	KBASE_TIMELINE_GPU_POWER(kbdev, 0);
 
 	clk_disable(kbase_rk->clk);
 
diff --git a/drivers/gpu/arm/midgard/sconscript b/drivers/gpu/arm/midgard/sconscript
index eae28f4..01c7589 100644
--- a/drivers/gpu/arm/midgard/sconscript
+++ b/drivers/gpu/arm/midgard/sconscript
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -47,22 +47,18 @@
 
 make_args = env.kernel_get_config_defines(ret_list = True) + [
 	'PLATFORM=%s' % env['platform'],
-	'MALI_ERROR_INJECT_ON=%s' % env['error_inject'],
 	'MALI_KERNEL_TEST_API=%s' % env['debug'],
 	'MALI_UNIT_TEST=%s' % env['unit'],
 	'MALI_RELEASE_NAME=%s' % env['mali_release_name'],
 	'MALI_MOCK_TEST=%s' % mock_test,
 	'MALI_CUSTOMER_RELEASE=%s' % env['release'],
+	'MALI_USE_CSF=%s' % env['csf'],
 	'MALI_COVERAGE=%s' % env['coverage'],
 ]
 
 kbase = env.BuildKernelModule('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src,
                               make_args = make_args)
 
-# need Module.symvers from ump.ko build
-if int(env['ump']) == 1:
-	env.Depends(kbase, '$STATIC_LIB_PATH/ump.ko')
-
 if 'smc_protected_mode_switcher' in env:
 	env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/smc_protected_mode_switcher.ko')
 
diff --git a/drivers/gpu/arm/midgard/tests/Mconfig b/drivers/gpu/arm/midgard/tests/Mconfig
new file mode 100644
index 0000000..ddd7630
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/Mconfig
@@ -0,0 +1,27 @@
+#
+# (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+
+config UNIT_TEST_KERNEL_MODULES
+	bool
+	default y if UNIT_TEST_CODE && BUILD_KERNEL_MODULES
+	default n
+
+config BUILD_IPA_TESTS
+	bool
+	default y if UNIT_TEST_KERNEL_MODULES && MALI_DEVFREQ
+	default n
+
+config BUILD_CSF_TESTS
+	bool
+	default y if UNIT_TEST_KERNEL_MODULES && GPU_HAS_CSF
+	default n
diff --git a/drivers/gpu/arm/midgard/tests/build.bp b/drivers/gpu/arm/midgard/tests/build.bp
new file mode 100644
index 0000000..3107062
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/build.bp
@@ -0,0 +1,36 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2018 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+bob_defaults {
+    name: "kernel_test_module_defaults",
+    defaults: ["mali_kbase_shared_config_defaults"],
+    include_dirs: [
+        "kernel/drivers/gpu/arm",
+        "kernel/drivers/gpu/arm/midgard",
+        "kernel/drivers/gpu/arm/midgard/backend/gpu",
+        "kernel/drivers/gpu/arm/midgard/tests/include",
+    ],
+}
+
+subdirs = [
+    "kutf",
+    "mali_kutf_irq_test",
+]
+
+optional_subdirs = [
+    "kutf_test",
+    "kutf_test_runner",
+    "mali_kutf_ipa_test",
+    "mali_kutf_ipa_unit_test",
+    "mali_kutf_vinstr_test",
+    "mali_kutf_fw_test",
+]
diff --git a/drivers/gpu/arm/midgard/tests/kutf/build.bp b/drivers/gpu/arm/midgard/tests/kutf/build.bp
new file mode 100644
index 0000000..960c8faa
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/kutf/build.bp
@@ -0,0 +1,31 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2018 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+bob_kernel_module {
+    name: "kutf",
+    defaults: ["kernel_defaults"],
+    srcs: [
+        "Kbuild",
+        "kutf_helpers.c",
+        "kutf_helpers_user.c",
+        "kutf_mem.c",
+        "kutf_resultset.c",
+        "kutf_suite.c",
+        "kutf_utils.c",
+    ],
+    kbuild_options: ["CONFIG_MALI_KUTF=m"],
+    include_dirs: ["kernel/drivers/gpu/arm/midgard/tests/include"],
+    enabled: false,
+    base_build_kutf: {
+        enabled: true,
+    },
+}
diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile
index e3dc5eb..9218a40 100644
--- a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile
+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2015, 2017-2018 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -29,13 +29,9 @@
 endif
 
 TEST_CCFLAGS := \
-	-DMALI_DEBUG=$(MALI_DEBUG) \
-	-DMALI_BACKEND_KERNEL=$(MALI_BACKEND_KERNEL) \
-	-DMALI_NO_MALI=$(MALI_NO_MALI) \
 	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
-	-DMALI_USE_UMP=$(MALI_USE_UMP) \
-	-DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \
 	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_USE_CSF=$(MALI_USE_CSF) \
 	$(SCONS_CFLAGS) \
 	-I$(CURDIR)/../include \
 	-I$(CURDIR)/../../../../../../include \
diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp
new file mode 100644
index 0000000..a6669af
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp
@@ -0,0 +1,30 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2018 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+bob_kernel_module {
+    name: "mali_kutf_irq_test",
+    defaults: ["kernel_test_module_defaults"],
+    srcs: [
+        "Kbuild",
+        "mali_kutf_irq_test_main.c",
+    ],
+    extra_symbols: [
+        "mali_kbase",
+        "kutf",
+    ],
+    install_group: "IG_tests",
+    enabled: false,
+    base_build_kutf: {
+        enabled: true,
+        kbuild_options: ["CONFIG_MALI_IRQ_LATENCY=m"],
+    },
+}
diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
index 5013a9d..4181b7f 100644
--- a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -90,15 +90,14 @@
 	struct kbase_device *kbdev = kbase_untag(data);
 	u32 val;
 
-	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS));
 	if (val & TEST_IRQ) {
 		struct timespec tval;
 
 		getnstimeofday(&tval);
 		irq_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec);
 
-		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val,
-				NULL);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
 
 		triggered = true;
 		wake_up(&wait);
@@ -194,7 +193,7 @@
 
 		/* Trigger fake IRQ */
 		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
-				TEST_IRQ, NULL);
+				TEST_IRQ);
 
 		ret = wait_event_timeout(wait, triggered != false, IRQ_TIMEOUT);
 
diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript
index be69514..76e37308 100644
--- a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript
+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -29,7 +29,7 @@
 	cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, [])
 	env.KernelObjTarget('mali_kutf_irq_test', cmd)
 else:
-	makeAction=Action("cd ${SOURCE.dir} && make MALI_DEBUG=${debug} MALI_BACKEND_KERNEL=1 MALI_ERROR_INJECT_ON=${error_inject} MALI_NO_MALI=${no_mali} MALI_UNIT_TEST=${unit} MALI_USE_UMP=${ump} MALI_CUSTOMER_RELEASE=${release} %s && ( ( [ -f mali_kutf_irq_test.ko ] && cp mali_kutf_irq_test.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/mali_kutf_irq_test.ko)" % env.kernel_get_config_defines(), '$MAKECOMSTR')
+	makeAction=Action("cd ${SOURCE.dir} && make MALI_UNIT_TEST=${unit} MALI_CUSTOMER_RELEASE=${release} MALI_USE_CSF=${csf} %s && ( ( [ -f mali_kutf_irq_test.ko ] && cp mali_kutf_irq_test.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/mali_kutf_irq_test.ko)" % env.kernel_get_config_defines(), '$MAKECOMSTR')
 	cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, [makeAction])
 	env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/kutf.ko')
 	env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/mali_kbase.ko')
diff --git a/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c b/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c
index 1690da4..3aab51a 100644
--- a/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c
+++ b/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c
@@ -49,12 +49,15 @@
  *                  alignment, length and limits for the allocation
  * @is_shader_code: True if the allocation is for shader code (which has
  *                  additional alignment requirements)
+ * @is_same_4gb_page: True if the allocation needs to reside completely within
+ *                    a 4GB chunk
  *
  * Return: true if gap_end is now aligned correctly and is still in range,
  *         false otherwise
  */
 static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
-		struct vm_unmapped_area_info *info, bool is_shader_code)
+		struct vm_unmapped_area_info *info, bool is_shader_code,
+		bool is_same_4gb_page)
 {
 	/* Compute highest gap address at the desired alignment */
 	(*gap_end) -= info->length;
@@ -72,6 +75,35 @@
 		if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end +
 				info->length) & BASE_MEM_MASK_4GB))
 			return false;
+	} else if (is_same_4gb_page) {
+		unsigned long start = *gap_end;
+		unsigned long end = *gap_end + info->length;
+		unsigned long mask = ~((unsigned long)U32_MAX);
+
+		/* Check if 4GB boundary is straddled */
+		if ((start & mask) != ((end - 1) & mask)) {
+			unsigned long offset = end - (end & mask);
+			/* This is to ensure that alignment doesn't get
+			 * disturbed in an attempt to prevent straddling at
+			 * 4GB boundary. The GPU VA is aligned to 2MB when the
+			 * allocation size is > 2MB and there is enough CPU &
+			 * GPU virtual space.
+			 */
+			unsigned long rounded_offset =
+					ALIGN(offset, info->align_mask + 1);
+
+			start -= rounded_offset;
+			end -= rounded_offset;
+
+			*gap_end = start;
+
+			/* The preceding 4GB boundary shall not get straddled,
+			 * even after accounting for the alignment, as the
+			 * size of allocation is limited to 4GB and the initial
+			 * start location was already aligned.
+			 */
+			WARN_ON((start & mask) != ((end - 1) & mask));
+		}
 	}
 
 
@@ -89,6 +121,8 @@
  * @is_shader_code:    Boolean which denotes whether the allocated area is
  *                      intended for the use by shader core in which case a
  *                      special alignment requirements apply.
+ * @is_same_4gb_page: Boolean which indicates whether the allocated area needs
+ *                    to reside completely within a 4GB chunk.
  *
  * The unmapped_area_topdown() function in the Linux kernel is not exported
  * using EXPORT_SYMBOL_GPL macro. To allow us to call this function from a
@@ -97,25 +131,26 @@
  * of this function and prefixed it with 'kbase_'.
  *
  * The difference in the call parameter list comes from the fact that
- * kbase_unmapped_area_topdown() is called with additional parameter which
- * is provided to denote whether the allocation is for a shader core memory
- * or not. This is significant since the executable shader core memory has
- * additional alignment requirements.
+ * kbase_unmapped_area_topdown() is called with additional parameters which
+ * are provided to indicate whether the allocation is for a shader core memory,
+ * which has additional alignment requirements, and whether the allocation can
+ * straddle a 4GB boundary.
  *
  * The modification of the original Linux function lies in how the computation
  * of the highest gap address at the desired alignment is performed once the
  * gap with desirable properties is found. For this purpose a special function
  * is introduced (@ref align_and_check()) which beside computing the gap end
- * at the desired alignment also performs additional alignment check for the
- * case when the memory is executable shader core memory. For such case, it is
- * ensured that the gap does not end on a 4GB boundary.
+ * at the desired alignment also performs additional alignment checks for the
+ * case when the memory is executable shader core memory, for which it is
+ * ensured that the gap does not end on a 4GB boundary, and for the case when
+ * memory needs to be confined within a 4GB chunk.
  *
  * Return: address of the found gap end (high limit) if area is found;
  *         -ENOMEM if search is unsuccessful
 */
 
 static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
-		*info, bool is_shader_code)
+		*info, bool is_shader_code, bool is_same_4gb_page)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
@@ -142,7 +177,8 @@
 	/* Check highest gap, which does not precede any rbtree node */
 	gap_start = mm->highest_vm_end;
 	if (gap_start <= high_limit) {
-		if (align_and_check(&gap_end, gap_start, info, is_shader_code))
+		if (align_and_check(&gap_end, gap_start, info,
+				is_shader_code, is_same_4gb_page))
 			return gap_end;
 	}
 
@@ -178,7 +214,7 @@
 				gap_end = info->high_limit;
 
 			if (align_and_check(&gap_end, gap_start, info,
-					is_shader_code))
+					is_shader_code, is_same_4gb_page))
 				return gap_end;
 		}
 
@@ -232,6 +268,7 @@
 	int gpu_pc_bits =
 	      kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
 	bool is_shader_code = false;
+	bool is_same_4gb_page = false;
 	unsigned long ret;
 
 	/* err on fixed address */
@@ -245,12 +282,13 @@
 
 	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
 
-		if (kbase_hw_has_feature(kctx->kbdev,
-						BASE_HW_FEATURE_33BIT_VA)) {
-			high_limit = kctx->same_va_end << PAGE_SHIFT;
-		} else {
-			high_limit = min_t(unsigned long, mm->mmap_base,
-					(kctx->same_va_end << PAGE_SHIFT));
+		high_limit = min_t(unsigned long, mm->mmap_base,
+				(kctx->same_va_end << PAGE_SHIFT));
+
+		/* If there's enough (> 33 bits) of GPU VA space, align
+		 * to 2MB boundaries.
+		 */
+		if (kctx->kbdev->gpu_props.mmu.va_bits > 33) {
 			if (len >= SZ_2M) {
 				align_offset = SZ_2M;
 				align_mask = SZ_2M - 1;
@@ -290,6 +328,8 @@
 				align_mask = extent_bytes - 1;
 				align_offset =
 				      extent_bytes - (reg->initial_commit << PAGE_SHIFT);
+			} else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) {
+				is_same_4gb_page = true;
 			}
 #ifndef CONFIG_64BIT
 	} else {
@@ -305,7 +345,8 @@
 	info.align_offset = align_offset;
 	info.align_mask = align_mask;
 
-	ret = kbase_unmapped_area_topdown(&info, is_shader_code);
+	ret = kbase_unmapped_area_topdown(&info, is_shader_code,
+			is_same_4gb_page);
 
 	if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base &&
 			high_limit < (kctx->same_va_end << PAGE_SHIFT)) {
@@ -314,7 +355,8 @@
 		info.high_limit = min_t(u64, TASK_SIZE,
 					(kctx->same_va_end << PAGE_SHIFT));
 
-		ret = kbase_unmapped_area_topdown(&info, is_shader_code);
+		ret = kbase_unmapped_area_topdown(&info, is_shader_code,
+				is_same_4gb_page);
 	}
 
 	return ret;
diff --git a/drivers/gpu/drm/evdi/evdi_painter.c b/drivers/gpu/drm/evdi/evdi_painter.c
index 11e1b88..6b45c8ac 100644
--- a/drivers/gpu/drm/evdi/evdi_painter.c
+++ b/drivers/gpu/drm/evdi/evdi_painter.c
@@ -560,7 +560,7 @@
 	return 0;
 }
 
-static void evdi_painter_disconnect(struct evdi_device *evdi,
+static int evdi_painter_disconnect(struct evdi_device *evdi,
 	struct drm_file *file)
 {
 	struct evdi_painter *painter = evdi->painter;
@@ -575,9 +575,8 @@
 		     evdi->dev_index, file);
 		EVDI_WARN(" - ignoring\n");
 
-
 		painter_unlock(painter);
-		return;
+		return -EFAULT;
 	}
 
 	evdi_painter_set_new_scanout_buffer(evdi, NULL);
@@ -602,6 +601,7 @@
 	painter_unlock(painter);
 
 	drm_helper_hpd_irq_event(evdi->ddev);
+	return 0;
 }
 
 void evdi_painter_close(struct evdi_device *evdi, struct drm_file *file)
@@ -620,20 +620,21 @@
 	struct evdi_device *evdi = drm_dev->dev_private;
 	struct evdi_painter *painter = evdi->painter;
 	struct drm_evdi_connect *cmd = data;
+	int ret;
 
 	EVDI_CHECKPT();
 	if (painter) {
 		if (cmd->connected)
-			evdi_painter_connect(evdi,
+			ret = evdi_painter_connect(evdi,
 					     cmd->edid,
 					     cmd->edid_length,
 					     cmd->sku_area_limit,
 					     file,
 					     cmd->dev_index);
 		else
-			evdi_painter_disconnect(evdi, file);
+			ret = evdi_painter_disconnect(evdi, file);
 
-		return 0;
+		return ret;
 	}
 	EVDI_WARN("Painter does not exist!");
 	return -ENODEV;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c63d2824..4f2f2aa 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1675,6 +1675,9 @@
 	return obj->madv == I915_MADV_DONTNEED;
 }
 
+extern void mlock_vma_page(struct page *page);
+extern unsigned int munlock_vma_page(struct page *page);
+
 static void
 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
 {
@@ -1708,6 +1711,10 @@
 		if (obj->madv == I915_MADV_WILLNEED)
 			mark_page_accessed(page);
 
+		lock_page(page);
+		munlock_vma_page(page);
+		unlock_page(page);
+
 		page_cache_release(page);
 	}
 	obj->dirty = 0;
@@ -1900,6 +1907,11 @@
 			gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
 			gfp &= ~(__GFP_IO | __GFP_WAIT);
 		}
+
+		lock_page(page);
+		mlock_vma_page(page);
+		unlock_page(page);
+
 #ifdef CONFIG_SWIOTLB
 		if (swiotlb_nr_tbl()) {
 			st->nents++;
@@ -1934,8 +1946,12 @@
 
 err_pages:
 	sg_mark_end(sg);
-	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
+	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
+		lock_page(page);
+		munlock_vma_page(page);
+		unlock_page(page);
 		page_cache_release(sg_page_iter_page(&sg_iter));
+	}
 	sg_free_table(st);
 	kfree(st);
 	return PTR_ERR(page);
diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c
index 91088b8..2f6c5317 100644
--- a/drivers/i2c/busses/i2c-qup.c
+++ b/drivers/i2c/busses/i2c-qup.c
@@ -343,9 +343,12 @@
 		}
 
 		if (qup->bus_err || qup->qup_err) {
-			if (qup->bus_err & QUP_I2C_NACK_FLAG)
+			if (qup->bus_err & QUP_I2C_NACK_FLAG) {
 				dev_err(qup->dev, "NACK from %x\n", msg->addr);
-			ret = -EIO;
+				ret = -EAGAIN;
+			} else {
+				ret = -EIO;
+			}
 			goto err;
 		}
 	} while (qup->pos < msg->len);
@@ -482,9 +485,12 @@
 		}
 
 		if (qup->bus_err || qup->qup_err) {
-			if (qup->bus_err & QUP_I2C_NACK_FLAG)
+			if (qup->bus_err & QUP_I2C_NACK_FLAG) {
 				dev_err(qup->dev, "NACK from %x\n", msg->addr);
-			ret = -EIO;
+				ret = -EAGAIN;
+			} else {
+				ret = -EIO;
+			}
 			goto err;
 		}
 
@@ -700,6 +706,7 @@
 	dev_dbg(qup->dev, "IN:block:%d, fifo:%d, OUT:block:%d, fifo:%d\n",
 		qup->in_blk_sz, qup->in_fifo_sz,
 		qup->out_blk_sz, qup->out_fifo_sz);
+	qup->adap.retries = 3;
 
 	pm_runtime_set_autosuspend_delay(qup->dev, MSEC_PER_SEC);
 	pm_runtime_use_autosuspend(qup->dev);
diff --git a/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_vp8e.c b/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_vp8e.c
index ce02712..cd0a519 100644
--- a/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_vp8e.c
+++ b/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_vp8e.c
@@ -183,14 +183,18 @@
 static void rk3288_vpu_vp8e_set_buffers(struct rk3288_vpu_dev *vpu,
 					struct rk3288_vpu_ctx *ctx)
 {
+	const u32 src_addr_regs[] = { VEPU_REG_ADDR_IN_LUMA,
+				      VEPU_REG_ADDR_IN_CB,
+				      VEPU_REG_ADDR_IN_CR };
 	const struct rk3288_vp8e_reg_params *params = ctx->run.vp8e.reg_params;
+	struct v4l2_pix_format_mplane *src_fmt = &ctx->src_fmt;
 	dma_addr_t ref_buf_dma, rec_buf_dma;
 	dma_addr_t stream_dma;
 	size_t rounded_size;
 	dma_addr_t dst_dma;
 	u32 start_offset;
 	size_t dst_size;
-
+	int i;
 	rounded_size = ref_luma_size(ctx->src_fmt.width,
 						ctx->src_fmt.height);
 
@@ -279,15 +283,19 @@
 		vepu_write_relaxed(vpu, vpu->dummy_encode_src[PLANE_CR].dma,
 					VEPU_REG_ADDR_IN_CR);
 	} else {
-		vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(
-					&ctx->run.src->b, PLANE_Y),
-					VEPU_REG_ADDR_IN_LUMA);
-		vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(
-					&ctx->run.src->b, PLANE_CB),
-					VEPU_REG_ADDR_IN_CB);
-		vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(
-					&ctx->run.src->b, PLANE_CR),
-					VEPU_REG_ADDR_IN_CR);
+		/*
+		 * TODO(crbug.com/901264): The way to pass an offset within a
+		 * DMA-buf is not defined in V4L2 specification, so we abuse
+		 * data_offset for now. Fix it when we have the right interface,
+		 * including any necessary validation and potential alignment
+		 * issues.
+		 */
+		for (i = 0; i < src_fmt->num_planes; ++i)
+			vepu_write_relaxed(
+				vpu, vb2_dma_contig_plane_dma_addr(
+					&ctx->run.src->b, i) +
+				ctx->run.src->b.v4l2_planes[i].data_offset,
+				src_addr_regs[i]);
 	}
 
 	/* Source parameters. */
diff --git a/drivers/media/usb/uvc/uvc_isight.c b/drivers/media/usb/uvc/uvc_isight.c
index 8510e725..363249e 100644
--- a/drivers/media/usb/uvc/uvc_isight.c
+++ b/drivers/media/usb/uvc/uvc_isight.c
@@ -99,9 +99,10 @@
 	return 0;
 }
 
-void uvc_video_decode_isight(struct urb *urb, struct uvc_streaming *stream,
-		struct uvc_buffer *buf)
+void uvc_video_decode_isight(struct uvc_urb *uvc_urb, struct uvc_buffer *buf)
 {
+	struct urb *urb = uvc_urb->urb;
+	struct uvc_streaming *stream = uvc_urb->stream;
 	int ret, i;
 
 	for (i = 0; i < urb->number_of_packets; ++i) {
diff --git a/drivers/media/usb/uvc/uvc_queue.c b/drivers/media/usb/uvc/uvc_queue.c
index 634784d..ada5559 100644
--- a/drivers/media/usb/uvc/uvc_queue.c
+++ b/drivers/media/usb/uvc/uvc_queue.c
@@ -119,6 +119,7 @@
 
 	spin_lock_irqsave(&queue->irqlock, flags);
 	if (likely(!(queue->flags & UVC_QUEUE_DISCONNECTED))) {
+		kref_init(&buf->ref);
 		list_add_tail(&buf->queue, &queue->irqqueue);
 	} else {
 		/* If the device is disconnected return the buffer to userspace
@@ -159,7 +160,6 @@
 {
 	struct uvc_video_queue *queue = vb2_get_drv_priv(vq);
 	struct uvc_streaming *stream = uvc_queue_to_stream(queue);
-	unsigned long flags;
 	int ret;
 
 	queue->buf_used = 0;
@@ -168,9 +168,9 @@
 	if (ret == 0)
 		return 0;
 
-	spin_lock_irqsave(&queue->irqlock, flags);
+	spin_lock_irq(&queue->irqlock);
 	uvc_queue_return_buffers(queue, UVC_BUF_STATE_QUEUED);
-	spin_unlock_irqrestore(&queue->irqlock, flags);
+	spin_unlock_irq(&queue->irqlock);
 
 	return ret;
 }
@@ -179,13 +179,12 @@
 {
 	struct uvc_video_queue *queue = vb2_get_drv_priv(vq);
 	struct uvc_streaming *stream = uvc_queue_to_stream(queue);
-	unsigned long flags;
 
 	uvc_video_enable(stream, 0);
 
-	spin_lock_irqsave(&queue->irqlock, flags);
+	spin_lock_irq(&queue->irqlock);
 	uvc_queue_return_buffers(queue, UVC_BUF_STATE_ERROR);
-	spin_unlock_irqrestore(&queue->irqlock, flags);
+	spin_unlock_irq(&queue->irqlock);
 }
 
 static struct vb2_ops uvc_queue_qops = {
@@ -409,32 +408,93 @@
 	spin_unlock_irqrestore(&queue->irqlock, flags);
 }
 
+/*
+ * uvc_queue_get_current_buffer: Obtain the current working output buffer
+ *
+ * Buffers may span multiple packets, and even URBs, therefore the active buffer
+ * remains on the queue until the EOF marker.
+ */
+static struct uvc_buffer *
+__uvc_queue_get_current_buffer(struct uvc_video_queue *queue)
+{
+	if (list_empty(&queue->irqqueue))
+		return NULL;
+
+	return list_first_entry(&queue->irqqueue, struct uvc_buffer, queue);
+}
+
+struct uvc_buffer *uvc_queue_get_current_buffer(struct uvc_video_queue *queue)
+{
+	struct uvc_buffer *nextbuf;
+	unsigned long flags;
+
+	spin_lock_irqsave(&queue->irqlock, flags);
+	nextbuf = __uvc_queue_get_current_buffer(queue);
+	spin_unlock_irqrestore(&queue->irqlock, flags);
+
+	return nextbuf;
+}
+
+/*
+ * uvc_queue_requeue: Requeue a buffer on our internal irqqueue
+ *
+ * Reuse a buffer through our internal queue without the need to 'prepare'
+ * The buffer will be returned to userspace through the uvc_buffer_queue call if
+ * the device has been disconnected
+ */
+static void uvc_queue_requeue(struct uvc_video_queue *queue,
+		struct uvc_buffer *buf)
+{
+	buf->error = 0;
+	buf->state = UVC_BUF_STATE_QUEUED;
+	buf->bytesused = 0;
+	vb2_set_plane_payload(&buf->buf, 0, 0);
+
+	uvc_buffer_queue(&buf->buf);
+}
+
+static void uvc_queue_buffer_complete(struct kref *ref)
+{
+	struct uvc_buffer *buf = container_of(ref, struct uvc_buffer, ref);
+	struct vb2_buffer *vb = &buf->buf;
+	struct uvc_video_queue *queue = vb2_get_drv_priv(vb->vb2_queue);
+
+	if ((queue->flags & UVC_QUEUE_DROP_CORRUPTED) && buf->error) {
+		uvc_queue_requeue(queue, buf);
+		return;
+	}
+
+	buf->state = buf->error ? UVC_BUF_STATE_ERROR : UVC_BUF_STATE_DONE;
+	vb2_set_plane_payload(&buf->buf, 0, buf->bytesused);
+	vb2_buffer_done(&buf->buf, VB2_BUF_STATE_DONE);
+}
+
+/*
+ * Release a reference on the buffer. Complete the buffer when the last
+ * reference is released
+ */
+void uvc_queue_buffer_release(struct uvc_buffer *buf)
+{
+	kref_put(&buf->ref, uvc_queue_buffer_complete);
+}
+
+/*
+ * Remove this buffer from the queue. Lifetime will persist while async actions
+ * are still running (if any), and uvc_queue_buffer_release will give the buffer
+ * back to VB2 when all users have completed.
+ */
 struct uvc_buffer *uvc_queue_next_buffer(struct uvc_video_queue *queue,
 		struct uvc_buffer *buf)
 {
 	struct uvc_buffer *nextbuf;
 	unsigned long flags;
 
-	if ((queue->flags & UVC_QUEUE_DROP_CORRUPTED) && buf->error) {
-		buf->error = 0;
-		buf->state = UVC_BUF_STATE_QUEUED;
-		buf->bytesused = 0;
-		vb2_set_plane_payload(&buf->buf, 0, 0);
-		return buf;
-	}
-
 	spin_lock_irqsave(&queue->irqlock, flags);
 	list_del(&buf->queue);
-	if (!list_empty(&queue->irqqueue))
-		nextbuf = list_first_entry(&queue->irqqueue, struct uvc_buffer,
-					   queue);
-	else
-		nextbuf = NULL;
+	nextbuf = __uvc_queue_get_current_buffer(queue);
 	spin_unlock_irqrestore(&queue->irqlock, flags);
 
-	buf->state = buf->error ? VB2_BUF_STATE_ERROR : UVC_BUF_STATE_DONE;
-	vb2_set_plane_payload(&buf->buf, 0, buf->bytesused);
-	vb2_buffer_done(&buf->buf, VB2_BUF_STATE_DONE);
+	uvc_queue_buffer_release(buf);
 
 	return nextbuf;
 }
diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c
index f328b6d..6fb890c 100644
--- a/drivers/media/usb/uvc/uvc_video.c
+++ b/drivers/media/usb/uvc/uvc_video.c
@@ -363,36 +363,15 @@
 
 static void
 uvc_video_clock_decode(struct uvc_streaming *stream, struct uvc_buffer *buf,
-		       const __u8 *data, int len)
+		       const __u8 *data, int len, unsigned int header_size,
+		       bool has_pts, bool has_scr)
 {
 	struct uvc_clock_sample *sample;
-	unsigned int header_size;
-	bool has_pts = false;
-	bool has_scr = false;
 	unsigned long flags;
 	struct timespec ts;
 	u16 host_sof;
 	u16 dev_sof;
 
-	switch (data[1] & (UVC_STREAM_PTS | UVC_STREAM_SCR)) {
-	case UVC_STREAM_PTS | UVC_STREAM_SCR:
-		header_size = 12;
-		has_pts = true;
-		has_scr = true;
-		break;
-	case UVC_STREAM_PTS:
-		header_size = 6;
-		has_pts = true;
-		break;
-	case UVC_STREAM_SCR:
-		header_size = 8;
-		has_scr = true;
-		break;
-	default:
-		header_size = 2;
-		break;
-	}
-
 	/* Check for invalid headers. */
 	if (len < header_size)
 		return;
@@ -698,11 +677,10 @@
  */
 
 static void uvc_video_stats_decode(struct uvc_streaming *stream,
-		const __u8 *data, int len)
+				   const __u8 *data, int len,
+				   unsigned int header_size, bool has_pts,
+				   bool has_scr)
 {
-	unsigned int header_size;
-	bool has_pts = false;
-	bool has_scr = false;
 	u16 uninitialized_var(scr_sof);
 	u32 uninitialized_var(scr_stc);
 	u32 uninitialized_var(pts);
@@ -711,25 +689,6 @@
 	    stream->stats.frame.nb_packets == 0)
 		ktime_get_ts(&stream->stats.stream.start_ts);
 
-	switch (data[1] & (UVC_STREAM_PTS | UVC_STREAM_SCR)) {
-	case UVC_STREAM_PTS | UVC_STREAM_SCR:
-		header_size = 12;
-		has_pts = true;
-		has_scr = true;
-		break;
-	case UVC_STREAM_PTS:
-		header_size = 6;
-		has_pts = true;
-		break;
-	case UVC_STREAM_SCR:
-		header_size = 8;
-		has_scr = true;
-		break;
-	default:
-		header_size = 2;
-		break;
-	}
-
 	/* Check for invalid headers. */
 	if (len < header_size || data[0] < header_size) {
 		stream->stats.frame.nb_invalid++;
@@ -947,10 +906,42 @@
  * to be called with a NULL buf parameter. uvc_video_decode_data and
  * uvc_video_decode_end will never be called with a NULL buffer.
  */
+
+static void uvc_video_decode_header_size(const __u8 *data, int *header_size,
+					 bool *has_pts, bool *has_scr)
+{
+	switch (data[1] & (UVC_STREAM_PTS | UVC_STREAM_SCR)) {
+	case UVC_STREAM_PTS | UVC_STREAM_SCR:
+		*header_size = 12;
+		*has_pts = true;
+		*has_scr = true;
+		break;
+	case UVC_STREAM_PTS:
+		*header_size = 6;
+		*has_pts = true;
+		break;
+	case UVC_STREAM_SCR:
+		*header_size = 8;
+		*has_scr = true;
+		break;
+	default:
+		*header_size = 2;
+	}
+}
+
 static int uvc_video_decode_start(struct uvc_streaming *stream,
-		struct uvc_buffer *buf, const __u8 *data, int len)
+				  struct uvc_buffer *buf, const __u8 *urb_data,
+				  int len)
 {
 	__u8 fid;
+	__u8 data[12];
+	unsigned int header_size;
+	bool has_pts = false, has_scr = false;
+
+	/* Cache the header since urb_data is uncached memory. The
+	 * size of header is at most 12 bytes.
+	 */
+	memcpy(data, urb_data, min(len, 12));
 
 	/* Sanity checks:
 	 * - packet must be at least 2 bytes long
@@ -973,8 +964,12 @@
 			uvc_video_stats_update(stream);
 	}
 
-	uvc_video_clock_decode(stream, buf, data, len);
-	uvc_video_stats_decode(stream, data, len);
+	uvc_video_decode_header_size(data, &header_size, &has_pts, &has_scr);
+
+	uvc_video_clock_decode(stream, buf, data, len, header_size, has_pts,
+			       has_scr);
+	uvc_video_stats_decode(stream, data, len, header_size, has_pts,
+			       has_scr);
 
 	/* Store the payload FID bit and return immediately when the buffer is
 	 * NULL.
@@ -1053,27 +1048,62 @@
 	return data[0];
 }
 
-static void uvc_video_decode_data(struct uvc_streaming *stream,
-		struct uvc_buffer *buf, const __u8 *data, int len)
+/*
+ * uvc_video_decode_data_work: Asynchronous memcpy processing
+ *
+ * Perform memcpy tasks in process context, with completion handlers
+ * to return the URB, and buffer handles.
+ */
+static void uvc_video_copy_data_work(struct work_struct *work)
 {
-	unsigned int maxlen, nbytes;
-	void *mem;
+	struct uvc_urb *uvc_urb = container_of(work, struct uvc_urb, work);
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < uvc_urb->async_operations; i++) {
+		struct uvc_copy_op *op = &uvc_urb->copy_operations[i];
+
+		memcpy(op->dst, op->src, op->len);
+
+		/* Release reference taken on this buffer */
+		uvc_queue_buffer_release(op->buf);
+	}
+
+	ret = usb_submit_urb(uvc_urb->urb, GFP_ATOMIC);
+	if (ret < 0)
+		uvc_printk(KERN_ERR, "Failed to resubmit video URB (%d).\n",
+			   ret);
+}
+
+static void uvc_video_decode_data(struct uvc_urb *uvc_urb,
+		struct uvc_buffer *buf, const u8 *data, int len)
+{
+	unsigned int active_op = uvc_urb->async_operations;
+	struct uvc_copy_op *decode = &uvc_urb->copy_operations[active_op];
+	unsigned int maxlen;
 
 	if (len <= 0)
 		return;
 
-	/* Copy the video data to the buffer. */
 	maxlen = buf->length - buf->bytesused;
-	mem = buf->mem + buf->bytesused;
-	nbytes = min((unsigned int)len, maxlen);
-	memcpy(mem, data, nbytes);
-	buf->bytesused += nbytes;
+
+	/* Take a buffer reference for async work */
+	kref_get(&buf->ref);
+
+	decode->buf = buf;
+	decode->src = data;
+	decode->dst = buf->mem + buf->bytesused;
+	decode->len = min_t(unsigned int, len, maxlen);
+
+	buf->bytesused += decode->len;
 
 	/* Complete the current frame if the buffer size was exceeded. */
 	if (len > maxlen) {
 		uvc_trace(UVC_TRACE_FRAME, "Frame complete (overflow).\n");
 		buf->state = UVC_BUF_STATE_READY;
 	}
+
+	uvc_urb->async_operations++;
 }
 
 static void uvc_video_decode_end(struct uvc_streaming *stream,
@@ -1136,9 +1166,11 @@
 /*
  * Completion handler for video URBs.
  */
-static void uvc_video_decode_isoc(struct urb *urb, struct uvc_streaming *stream,
+static void uvc_video_decode_isoc(struct uvc_urb *uvc_urb,
 	struct uvc_buffer *buf)
 {
+	struct urb *urb = uvc_urb->urb;
+	struct uvc_streaming *stream = uvc_urb->stream;
 	u8 *mem;
 	int ret, i;
 
@@ -1166,7 +1198,7 @@
 			continue;
 
 		/* Decode the payload data. */
-		uvc_video_decode_data(stream, buf, mem + ret,
+		uvc_video_decode_data(uvc_urb, buf, mem + ret,
 			urb->iso_frame_desc[i].actual_length - ret);
 
 		/* Process the header again. */
@@ -1184,9 +1216,11 @@
 	}
 }
 
-static void uvc_video_decode_bulk(struct urb *urb, struct uvc_streaming *stream,
+static void uvc_video_decode_bulk(struct uvc_urb *uvc_urb,
 	struct uvc_buffer *buf)
 {
+	struct urb *urb = uvc_urb->urb;
+	struct uvc_streaming *stream = uvc_urb->stream;
 	u8 *mem;
 	int len, ret;
 
@@ -1229,9 +1263,9 @@
 	 * sure buf is never dereferenced if NULL.
 	 */
 
-	/* Process video data. */
+	/* Prepare video data for processing. */
 	if (!stream->bulk.skip_payload && buf != NULL)
-		uvc_video_decode_data(stream, buf, mem, len);
+		uvc_video_decode_data(uvc_urb, buf, mem, len);
 
 	/* Detect the payload end by a URB smaller than the maximum size (or
 	 * a payload size equal to the maximum) and process the header again.
@@ -1252,9 +1286,11 @@
 	}
 }
 
-static void uvc_video_encode_bulk(struct urb *urb, struct uvc_streaming *stream,
+static void uvc_video_encode_bulk(struct uvc_urb *uvc_urb,
 	struct uvc_buffer *buf)
 {
+	struct urb *urb = uvc_urb->urb;
+	struct uvc_streaming *stream = uvc_urb->stream;
 	u8 *mem = urb->transfer_buffer;
 	int len = stream->urb_size, ret;
 
@@ -1297,10 +1333,10 @@
 
 static void uvc_video_complete(struct urb *urb)
 {
-	struct uvc_streaming *stream = urb->context;
+	struct uvc_urb *uvc_urb = urb->context;
+	struct uvc_streaming *stream = uvc_urb->stream;
 	struct uvc_video_queue *queue = &stream->queue;
 	struct uvc_buffer *buf = NULL;
-	unsigned long flags;
 	int ret;
 
 	switch (urb->status) {
@@ -1311,7 +1347,7 @@
 		uvc_printk(KERN_WARNING, "Non-zero status (%d) in video "
 			"completion handler.\n", urb->status);
 
-	case -ENOENT:		/* usb_kill_urb() called. */
+	case -ENOENT:		/* usb_poison_urb() called. */
 		if (stream->frozen)
 			return;
 
@@ -1321,18 +1357,29 @@
 		return;
 	}
 
-	spin_lock_irqsave(&queue->irqlock, flags);
-	if (!list_empty(&queue->irqqueue))
-		buf = list_first_entry(&queue->irqqueue, struct uvc_buffer,
-				       queue);
-	spin_unlock_irqrestore(&queue->irqlock, flags);
+	buf = uvc_queue_get_current_buffer(queue);
 
-	stream->decode(urb, stream, buf);
+	/* Re-initialise the URB async work. */
+	uvc_urb->async_operations = 0;
 
-	if ((ret = usb_submit_urb(urb, GFP_ATOMIC)) < 0) {
-		uvc_printk(KERN_ERR, "Failed to resubmit video URB (%d).\n",
-			ret);
+	/*
+	 * Process the URB headers, and optionally queue expensive memcpy tasks
+	 * to be deferred to a work queue.
+	 */
+	stream->decode(uvc_urb, buf);
+
+	/* If no async work is needed, resubmit the URB immediately. */
+	if (!uvc_urb->async_operations) {
+		ret = usb_submit_urb(uvc_urb->urb, GFP_ATOMIC);
+		if (ret < 0)
+			uvc_printk(KERN_ERR,
+				   "Failed to resubmit video URB (%d).\n",
+				   ret);
+		return;
 	}
+
+	INIT_WORK(&uvc_urb->work, uvc_video_copy_data_work);
+	queue_work(stream->async_wq, &uvc_urb->work);
 }
 
 /*
@@ -1343,14 +1390,16 @@
 	unsigned int i;
 
 	for (i = 0; i < UVC_URBS; ++i) {
-		if (stream->urb_buffer[i]) {
+		struct uvc_urb *uvc_urb = &stream->uvc_urb[i];
+
+		if (uvc_urb->buffer) {
 #ifndef CONFIG_DMA_NONCOHERENT
 			usb_free_coherent(stream->dev->udev, stream->urb_size,
-				stream->urb_buffer[i], stream->urb_dma[i]);
+					uvc_urb->buffer, uvc_urb->dma);
 #else
-			kfree(stream->urb_buffer[i]);
+			kfree(uvc_urb->buffer);
 #endif
-			stream->urb_buffer[i] = NULL;
+			uvc_urb->buffer = NULL;
 		}
 	}
 
@@ -1388,19 +1437,23 @@
 	/* Retry allocations until one succeed. */
 	for (; npackets > 1; npackets /= 2) {
 		for (i = 0; i < UVC_URBS; ++i) {
+			struct uvc_urb *uvc_urb = &stream->uvc_urb[i];
+
 			stream->urb_size = psize * npackets;
 #ifndef CONFIG_DMA_NONCOHERENT
-			stream->urb_buffer[i] = usb_alloc_coherent(
+			uvc_urb->buffer = usb_alloc_coherent(
 				stream->dev->udev, stream->urb_size,
-				gfp_flags | __GFP_NOWARN, &stream->urb_dma[i]);
+				gfp_flags | __GFP_NOWARN, &uvc_urb->dma);
 #else
-			stream->urb_buffer[i] =
+			uvc_urb->buffer =
 			    kmalloc(stream->urb_size, gfp_flags | __GFP_NOWARN);
 #endif
-			if (!stream->urb_buffer[i]) {
+			if (!uvc_urb->buffer) {
 				uvc_free_urb_buffers(stream);
 				break;
 			}
+
+			uvc_urb->stream = stream;
 		}
 
 		if (i == UVC_URBS) {
@@ -1421,23 +1474,30 @@
  */
 static void uvc_uninit_video(struct uvc_streaming *stream, int free_buffers)
 {
-	struct urb *urb;
-	unsigned int i;
+	struct uvc_urb *uvc_urb;
 
 	uvc_video_stats_stop(stream);
 
-	for (i = 0; i < UVC_URBS; ++i) {
-		urb = stream->urb[i];
-		if (urb == NULL)
-			continue;
+	/*
+	 * We must poison the URBs rather than kill them to ensure that even
+	 * after the completion handler returns, any asynchronous workqueues
+	 * will be prevented from resubmitting the URBs
+	 */
+	for_each_uvc_urb(uvc_urb, stream)
+		usb_poison_urb(uvc_urb->urb);
 
-		usb_kill_urb(urb);
-		usb_free_urb(urb);
-		stream->urb[i] = NULL;
+	flush_workqueue(stream->async_wq);
+
+	for_each_uvc_urb(uvc_urb, stream) {
+		usb_free_urb(uvc_urb->urb);
+		uvc_urb->urb = NULL;
 	}
 
-	if (free_buffers)
+	if (free_buffers) {
 		uvc_free_urb_buffers(stream);
+		destroy_workqueue(stream->async_wq);
+		stream->async_wq = NULL;
+	}
 }
 
 /*
@@ -1482,6 +1542,8 @@
 	size = npackets * psize;
 
 	for (i = 0; i < UVC_URBS; ++i) {
+		struct uvc_urb *uvc_urb = &stream->uvc_urb[i];
+
 		urb = usb_alloc_urb(npackets, gfp_flags);
 		if (urb == NULL) {
 			uvc_uninit_video(stream, 1);
@@ -1489,17 +1551,17 @@
 		}
 
 		urb->dev = stream->dev->udev;
-		urb->context = stream;
+		urb->context = uvc_urb;
 		urb->pipe = usb_rcvisocpipe(stream->dev->udev,
 				ep->desc.bEndpointAddress);
 #ifndef CONFIG_DMA_NONCOHERENT
 		urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP;
-		urb->transfer_dma = stream->urb_dma[i];
+		urb->transfer_dma = uvc_urb->dma;
 #else
 		urb->transfer_flags = URB_ISO_ASAP;
 #endif
 		urb->interval = ep->desc.bInterval;
-		urb->transfer_buffer = stream->urb_buffer[i];
+		urb->transfer_buffer = uvc_urb->buffer;
 		urb->complete = uvc_video_complete;
 		urb->number_of_packets = npackets;
 		urb->transfer_buffer_length = size;
@@ -1509,7 +1571,7 @@
 			urb->iso_frame_desc[j].length = psize;
 		}
 
-		stream->urb[i] = urb;
+		uvc_urb->urb = urb;
 	}
 
 	return 0;
@@ -1548,21 +1610,22 @@
 		size = 0;
 
 	for (i = 0; i < UVC_URBS; ++i) {
+		struct uvc_urb *uvc_urb = &stream->uvc_urb[i];
+
 		urb = usb_alloc_urb(0, gfp_flags);
 		if (urb == NULL) {
 			uvc_uninit_video(stream, 1);
 			return -ENOMEM;
 		}
 
-		usb_fill_bulk_urb(urb, stream->dev->udev, pipe,
-			stream->urb_buffer[i], size, uvc_video_complete,
-			stream);
+		usb_fill_bulk_urb(urb, stream->dev->udev, pipe,	uvc_urb->buffer,
+				  size, uvc_video_complete, uvc_urb);
 #ifndef CONFIG_DMA_NONCOHERENT
 		urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP;
-		urb->transfer_dma = stream->urb_dma[i];
+		urb->transfer_dma = uvc_urb->dma;
 #endif
 
-		stream->urb[i] = urb;
+		uvc_urb->urb = urb;
 	}
 
 	return 0;
@@ -1586,6 +1649,13 @@
 
 	uvc_video_stats_start(stream);
 
+	if (!stream->async_wq) {
+		stream->async_wq = alloc_workqueue("uvcvideo",
+						   WQ_UNBOUND | WQ_HIGHPRI, 0);
+		if (!stream->async_wq)
+			return -ENOMEM;
+	}
+
 	if (intf->num_altsetting > 1) {
 		struct usb_host_endpoint *best_ep = NULL;
 		unsigned int best_psize = UINT_MAX;
@@ -1653,7 +1723,9 @@
 
 	/* Submit the URBs. */
 	for (i = 0; i < UVC_URBS; ++i) {
-		ret = usb_submit_urb(stream->urb[i], gfp_flags);
+		struct uvc_urb *uvc_urb = &stream->uvc_urb[i];
+
+		ret = usb_submit_urb(uvc_urb->urb, gfp_flags);
 		if (ret < 0) {
 			uvc_printk(KERN_ERR, "Failed to submit URB %u "
 					"(%d).\n", i, ret);
diff --git a/drivers/media/usb/uvc/uvcvideo.h b/drivers/media/usb/uvc/uvcvideo.h
index 5d13e5c..d347083 100644
--- a/drivers/media/usb/uvc/uvcvideo.h
+++ b/drivers/media/usb/uvc/uvcvideo.h
@@ -351,6 +351,9 @@
 	unsigned int bytesused;
 
 	u32 pts;
+
+	/* asynchronous buffer handling */
+	struct kref ref;
 };
 
 #define UVC_QUEUE_DISCONNECTED		(1 << 0)
@@ -426,6 +429,44 @@
 	unsigned int max_sof;		/* Maximum STC.SOF value */
 };
 
+/**
+ * struct uvc_copy_op: Context structure to schedule asynchronous memcpy
+ *
+ * @buf: active buf object for this operation
+ * @dst: copy destination address
+ * @src: copy source address
+ * @len: copy length
+ */
+struct uvc_copy_op {
+	struct uvc_buffer *buf;
+	void *dst;
+	const __u8 *src;
+	size_t len;
+};
+
+/**
+ * struct uvc_urb - URB context management structure
+ *
+ * @urb: the URB described by this context structure
+ * @stream: UVC streaming context
+ * @buffer: memory storage for the URB
+ * @dma: DMA coherent addressing for the urb_buffer
+ * @async_operations: counter to indicate the number of copy operations
+ * @copy_operations: work descriptors for asynchronous copy operations
+ * @work: work queue entry for asynchronous decode
+ */
+struct uvc_urb {
+	struct urb *urb;
+	struct uvc_streaming *stream;
+
+	char *buffer;
+	dma_addr_t dma;
+
+	unsigned int async_operations;
+	struct uvc_copy_op copy_operations[UVC_MAX_PACKETS];
+	struct work_struct work;
+};
+
 struct uvc_streaming {
 	struct list_head list;
 	struct uvc_device *dev;
@@ -455,8 +496,8 @@
 	/* Buffers queue. */
 	unsigned int frozen : 1;
 	struct uvc_video_queue queue;
-	void (*decode) (struct urb *urb, struct uvc_streaming *video,
-			struct uvc_buffer *buf);
+	struct workqueue_struct *async_wq;
+	void (*decode)(struct uvc_urb *urb, struct uvc_buffer *buf);
 
 	/* Context data used by the bulk completion handler. */
 	struct {
@@ -467,9 +508,7 @@
 		__u32 max_payload_size;
 	} bulk;
 
-	struct urb *urb[UVC_URBS];
-	char *urb_buffer[UVC_URBS];
-	dma_addr_t urb_dma[UVC_URBS];
+	struct uvc_urb uvc_urb[UVC_URBS];
 	unsigned int urb_size;
 
 	__u32 sequence;
@@ -506,6 +545,11 @@
 	UVC_DEV_DISCONNECTED = 1,
 };
 
+#define for_each_uvc_urb(uvc_urb, uvc_streaming) \
+	for (uvc_urb = &uvc_streaming->uvc_urb[0]; \
+	     uvc_urb < &uvc_streaming->uvc_urb[UVC_URBS]; \
+	     ++uvc_urb)
+
 struct uvc_device {
 	struct usb_device *udev;
 	struct usb_interface *intf;
@@ -631,6 +675,9 @@
 extern void uvc_queue_cancel(struct uvc_video_queue *queue, int disconnect);
 extern struct uvc_buffer *uvc_queue_next_buffer(struct uvc_video_queue *queue,
 		struct uvc_buffer *buf);
+extern struct uvc_buffer *uvc_queue_get_current_buffer(
+		struct uvc_video_queue *queue);
+extern void uvc_queue_buffer_release(struct uvc_buffer *buf);
 extern int uvc_queue_mmap(struct uvc_video_queue *queue,
 		struct vm_area_struct *vma);
 extern unsigned int uvc_queue_poll(struct uvc_video_queue *queue,
@@ -718,8 +765,7 @@
 		struct usb_host_interface *alts, __u8 epaddr);
 
 /* Quirks support */
-void uvc_video_decode_isight(struct urb *urb, struct uvc_streaming *stream,
-		struct uvc_buffer *buf);
+void uvc_video_decode_isight(struct uvc_urb *urb, struct uvc_buffer *buf);
 
 /* debugfs and statistics */
 int uvc_debugfs_init(void);
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index d9e7892..f866b4a 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -1443,6 +1443,7 @@
 {
 	struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);
 	if (pdata) {
+		cancel_work_sync(&pdata->set_multicast);
 		netif_dbg(dev, ifdown, dev->net, "free pdata\n");
 		kfree(pdata);
 		pdata = NULL;
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 88c3fd3..8bf840b 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -51,6 +51,9 @@
 	{ USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
 	{ USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT },
 
+	/* Logitech Logitech Screen Share */
+	{ USB_DEVICE(0x046d, 0x086c), .driver_info = USB_QUIRK_DELAY_INIT },
+
 	/* Logitech Quickcam Fusion */
 	{ USB_DEVICE(0x046d, 0x08c1), .driver_info = USB_QUIRK_RESET_RESUME },
 
diff --git a/drivers/usb/dwc2/hcd_intr.c b/drivers/usb/dwc2/hcd_intr.c
index d97e3a4..524061f 100644
--- a/drivers/usb/dwc2/hcd_intr.c
+++ b/drivers/usb/dwc2/hcd_intr.c
@@ -982,6 +982,8 @@
 	int pipe_type;
 	int urb_xfer_done;
 
+	qtd->num_naks = 0;
+
 	if (dbg_hc(chan))
 		dev_vdbg(hsotg->dev,
 			 "--Host Channel %d Interrupt: Transfer Complete--\n",
@@ -1218,7 +1220,10 @@
 	 * avoid interrupt storms we'll wait before retrying if we've got
 	 * several NAKs. If we didn't do this we'd retry directly from the
 	 * interrupt handler and could end up quickly getting another
-	 * interrupt (another NAK), which we'd retry.
+	 * interrupt (another NAK), which we'd retry. Note that we do not
+	 * delay retries for IN parts of control requests, as those are expected
+	 * to complete fairly quickly, and if we delay them we risk confusing
+	 * the device and cause it issue STALL.
 	 *
 	 * Note that in DMA mode software only gets involved to re-send NAKed
 	 * transfers for split transactions, so we only need to apply this
@@ -1231,7 +1236,9 @@
 			qtd->error_count = 0;
 		qtd->complete_split = 0;
 		qtd->num_naks++;
-		qtd->qh->want_wait = qtd->num_naks >= DWC2_NAKS_BEFORE_DELAY;
+		qtd->qh->want_wait = qtd->num_naks >= DWC2_NAKS_BEFORE_DELAY &&
+				!(chan->ep_type == USB_ENDPOINT_XFER_CONTROL &&
+				  chan->ep_is_in);
 		dwc2_halt_channel(hsotg, chan, qtd, DWC2_HC_XFER_NAK);
 		goto handle_nak_done;
 	}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 08b11b3..9461f3d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1442,11 +1442,6 @@
 static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 {
 	return ino == EXT4_ROOT_INO ||
-		ino == EXT4_USR_QUOTA_INO ||
-		ino == EXT4_GRP_QUOTA_INO ||
-		ino == EXT4_BOOT_LOADER_INO ||
-		ino == EXT4_JOURNAL_INO ||
-		ino == EXT4_RESIZE_INO ||
 		(ino >= EXT4_FIRST_INO(sb) &&
 		 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
 }
@@ -1749,7 +1744,6 @@
 					 EXT4_FEATURE_INCOMPAT_64BIT| \
 					 EXT4_FEATURE_INCOMPAT_FLEX_BG| \
 					 EXT4_FEATURE_INCOMPAT_MMP | \
-					 EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
 					 EXT4_FEATURE_INCOMPAT_ENCRYPT | \
 					 EXT4_FEATURE_INCOMPAT_CSUM_SEED)
 #define EXT4_FEATURE_RO_COMPAT_SUPP	(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 3c93815..2d8e737 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -103,6 +103,7 @@
 };
 
 #define EXT4_EXT_MAGIC		cpu_to_le16(0xf30a)
+#define EXT4_MAX_EXTENT_DEPTH 5
 
 #define EXT4_EXTENT_TAIL_OFFSET(hdr) \
 	(sizeof(struct ext4_extent_header) + \
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index fbf8849..98965d4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -875,6 +875,12 @@
 
 	eh = ext_inode_hdr(inode);
 	depth = ext_depth(inode);
+	if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) {
+		EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d",
+				 depth);
+		ret = -EFSCORRUPTED;
+		goto err;
+	}
 
 	if (path) {
 		ext4_ext_drop_refs(path);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index ecc2fc1..8f442b9 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -434,6 +434,7 @@
 
 	memset((void *)ext4_raw_inode(&is.iloc)->i_block,
 		0, EXT4_MIN_INLINE_DATA_SIZE);
+	memset(ei->i_data, 0, EXT4_MIN_INLINE_DATA_SIZE);
 
 	if (ext4_has_feature_extents(inode->i_sb)) {
 		if (S_ISDIR(inode->i_mode) ||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 450623b..6bafe7c7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3646,6 +3646,7 @@
 
 int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 {
+#if 0
 	struct super_block *sb = inode->i_sb;
 	ext4_lblk_t first_block, stop_block;
 	struct address_space *mapping = inode->i_mapping;
@@ -3776,6 +3777,12 @@
 out_mutex:
 	mutex_unlock(&inode->i_mutex);
 	return ret;
+#else
+	/*
+	 * Disabled as per b/28760453
+	 */
+	return -EOPNOTSUPP;
+#endif
 }
 
 int ext4_inode_attach_jinode(struct inode *inode)
@@ -3943,7 +3950,8 @@
 	int			inodes_per_block, inode_offset;
 
 	iloc->bh = NULL;
-	if (!ext4_valid_inum(sb, inode->i_ino))
+	if (inode->i_ino < EXT4_ROOT_INO ||
+	    inode->i_ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
 		return -EFSCORRUPTED;
 
 	iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8146975..db3b69c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3430,6 +3430,11 @@
 	} else {
 		sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
 		sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
+		if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
+			ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
+				 sbi->s_first_ino);
+			goto failed_mount;
+		}
 		if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
 		    (!is_power_of_2(sbi->s_inode_size)) ||
 		    (sbi->s_inode_size > blocksize)) {
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index bdec665..0fabca5 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -73,13 +73,13 @@
 				cpu_to_be32(HFSP_HARDLINK_TYPE) &&
 				entry.file.user_info.fdCreator ==
 				cpu_to_be32(HFSP_HFSPLUS_CREATOR) &&
+				HFSPLUS_SB(sb)->hidden_dir &&
 				(entry.file.create_date ==
 					HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)->
 						create_date ||
 				entry.file.create_date ==
 					HFSPLUS_I(sb->s_root->d_inode)->
-						create_date) &&
-				HFSPLUS_SB(sb)->hidden_dir) {
+						create_date)) {
 			struct qstr str;
 			char name[32];
 
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b971fae..1ea0514 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2689,8 +2689,8 @@
 	ONE("cgroup",  S_IRUGO, proc_cgroup_show),
 #endif
 	INF("oom_score",  S_IRUGO, proc_oom_score),
-	REG("oom_adj",    S_IRUSR, proc_oom_adj_operations),
-	REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations),
+	REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
+	REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
 #ifdef CONFIG_AUDITSYSCALL
 	REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
 	REG("sessionid",  S_IRUGO, proc_sessionid_operations),
@@ -3028,8 +3028,8 @@
 	ONE("cgroup",  S_IRUGO, proc_cgroup_show),
 #endif
 	INF("oom_score", S_IRUGO, proc_oom_score),
-	REG("oom_adj",   S_IRUSR, proc_oom_adj_operations),
-	REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations),
+	REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
+	REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
 #ifdef CONFIG_AUDITSYSCALL
 	REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
 	REG("sessionid",  S_IRUGO, proc_sessionid_operations),
diff --git a/include/linux/low-mem-notify.h b/include/linux/low-mem-notify.h
index 0dcfc62..62787c5 100644
--- a/include/linux/low-mem-notify.h
+++ b/include/linux/low-mem-notify.h
@@ -2,6 +2,7 @@
 #define _LINUX_LOW_MEM_NOTIFY_H
 
 #include <linux/mm.h>
+#include <linux/ratelimit.h>
 #include <linux/stddef.h>
 #include <linux/swap.h>
 
@@ -10,55 +11,58 @@
 void low_mem_notify(void);
 extern const struct file_operations low_mem_notify_fops;
 extern bool low_mem_margin_enabled;
-extern unsigned long low_mem_lowest_seen_anon_mem;
-extern const unsigned long low_mem_anon_mem_delta;
 extern unsigned int low_mem_ram_vs_swap_weight;
+extern struct ratelimit_state low_mem_logging_ratelimit;
 
 /*
  * Compute available memory used by files that can be reclaimed quickly.
  */
-static inline unsigned long get_available_file_mem(int lru_base)
+static inline unsigned long get_available_file_mem(void)
 {
 	unsigned long file_mem =
-			global_page_state(lru_base + LRU_ACTIVE_FILE) +
-			global_page_state(lru_base + LRU_INACTIVE_FILE);
+			global_page_state(NR_ACTIVE_FILE) +
+			global_page_state(NR_INACTIVE_FILE);
 	unsigned long dirty_mem = global_page_state(NR_FILE_DIRTY);
 	unsigned long min_file_mem = min_filelist_kbytes >> (PAGE_SHIFT - 10);
 	unsigned long clean_file_mem = file_mem - dirty_mem;
 	/* Conservatively estimate the amount of available_file_mem */
 	unsigned long available_file_mem = (clean_file_mem > min_file_mem) ?
-	    (clean_file_mem - min_file_mem) : 0;
+			(clean_file_mem - min_file_mem) : 0;
 	return available_file_mem;
 }
 
 /*
+ * Available anonymous memory.
+ */
+static inline unsigned long get_available_anon_mem(void)
+{
+	return global_page_state(NR_ACTIVE_ANON) +
+		global_page_state(NR_INACTIVE_ANON);
+}
+
+/*
  * Compute "available" memory, that is either free memory or memory that can be
  * reclaimed quickly, adjusted for the presence of swap.
  */
-static inline unsigned long get_available_mem_adj(int lru_base)
+static inline unsigned long get_available_mem_adj(void)
 {
-	/* min_free_kbytes is reserved for emergency allocation like when
-	 * PF_MEMALLOC is set. In general it's not usable in normal page
-	 * allocation process.
-	 */
-	unsigned long min_free_pages = min_free_kbytes >> (PAGE_SHIFT - 10);
 	/* free_mem is completely unallocated; clean file-backed memory
 	 * (file_mem - dirty_mem) is easy to reclaim, except for the last
-	 * min_filelist_kbytes.
+	 * min_filelist_kbytes. totalreserve_pages is the reserve of pages that
+	 * are not available to user space.
 	 */
-	unsigned long free_mem =
-			global_page_state(NR_FREE_PAGES) - min_free_pages;
+	unsigned long raw_free_mem = global_page_state(NR_FREE_PAGES);
+	unsigned long free_mem = (raw_free_mem > totalreserve_pages) ?
+			raw_free_mem - totalreserve_pages : 0;
 	unsigned long available_mem = free_mem +
-	    get_available_file_mem(lru_base);
-	long _nr_swap_pages = get_nr_swap_pages();
+			get_available_file_mem();
+	unsigned long swappable_pages = min_t(unsigned long,
+			get_nr_swap_pages(), get_available_anon_mem());
 	/*
 	 * The contribution of swap is reduced by a factor of
 	 * low_mem_ram_vs_swap_weight.
 	 */
-	unsigned long swap_adj = _nr_swap_pages;
-
-	do_div(swap_adj, low_mem_ram_vs_swap_weight);
-	return available_mem + swap_adj;
+	return available_mem + swappable_pages / low_mem_ram_vs_swap_weight;
 }
 
 /*
@@ -66,31 +70,20 @@
  */
 static inline bool _is_low_mem_situation(void)
 {
-	const int lru_base = NR_LRU_BASE - LRU_BASE;
 	static bool was_low_mem;	/* = false, as per style guide */
 	/* We declare a low-memory condition when a combination of RAM and swap
 	 * space is low.
 	 */
-	unsigned long available_mem = get_available_mem_adj(lru_base);
+	unsigned long available_mem = get_available_mem_adj();
 	bool is_low_mem = available_mem < low_mem_minfree;
 
-	if (unlikely(is_low_mem && !was_low_mem)) {
-		unsigned long anon_mem =
-			global_page_state(lru_base + LRU_ACTIVE_ANON) +
-			global_page_state(lru_base + LRU_INACTIVE_ANON);
-		if (unlikely(anon_mem < low_mem_lowest_seen_anon_mem)) {
-			printk(KERN_INFO "entering low_mem "
-			       "(avail RAM = %lu kB, avail swap %lu kB, "
-			       "avail file %lu kB) "
-			       "with lowest seen anon mem: %lu kB\n",
-			       available_mem * PAGE_SIZE / 1024,
-			       get_nr_swap_pages() * PAGE_SIZE / 1024,
-			       get_available_file_mem(lru_base) * PAGE_SIZE /
-				  1024,
-			       anon_mem * PAGE_SIZE / 1024);
-			low_mem_lowest_seen_anon_mem = anon_mem -
-				low_mem_anon_mem_delta;
-		}
+	if (unlikely(is_low_mem && !was_low_mem) &&
+	    __ratelimit(&low_mem_logging_ratelimit)) {
+		pr_info("entering low_mem (avail RAM = %lu kB, avail swap %lu kB, avail file %lu kB, anon mem: %lu kB)\n",
+			available_mem * PAGE_SIZE / 1024,
+			get_nr_swap_pages() * PAGE_SIZE / 1024,
+			get_available_file_mem() * PAGE_SIZE / 1024,
+			get_available_anon_mem() * PAGE_SIZE / 1024);
 	}
 	was_low_mem = is_low_mem;
 
diff --git a/include/linux/security.h b/include/linux/security.h
index 8acadb9..6fe292e 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -3236,6 +3236,14 @@
 int chromiumos_security_inode_follow_link(struct dentry *dentry,
 					  struct nameidata *nd);
 int chromiumos_security_file_open(struct file *file, const struct cred *cred);
+int chromiumos_security_capable(const struct cred *cred,
+				struct user_namespace *ns,
+				int cap);
+int chromiumos_security_task_fix_setuid(struct cred *new,
+					const struct cred *old,
+					int flags);
+int chromiumos_sb_copy_data(char *orig, char *copy);
+int chromiumos_sb_kern_mount(struct super_block *sb, int flags, void *data);
 #else
 static inline
 int chromiumos_security_sb_mount(const char *dev_name, struct path *path,
@@ -3271,6 +3279,24 @@
 {
 	return 0;
 }
+static inline
+int chromiumos_security_capable(const struct cred *cred,
+				struct user_namespace *ns,
+				int cap)
+static inline
+int chromiumos_sb_copy_data(char *orig, char *copy)
+{
+	return 0;
+}
+static inline
+int chromiumos_security_task_fix_setuid(struct cred *new,
+					const struct cred *old,
+					int flags)
+static inline
+int chromiumos_sb_kern_mount(struct super_block *sb, int flags, void *data)
+{
+	return 0;
+}
 #endif /* CONFIG_SECURITY_CHROMIUMOS */
 
 #endif /* ! __LINUX_SECURITY_H */
diff --git a/include/linux/string.h b/include/linux/string.h
index f9aa40b..70f5435 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -10,6 +10,7 @@
 
 extern char *strndup_user(const char __user *, long);
 extern void *memdup_user(const void __user *, size_t);
+extern void *memdup_user_nul(const void __user *, size_t);
 
 /*
  * Include machine specific inline routines
@@ -118,6 +119,7 @@
 extern const char *kstrdup_const(const char *s, gfp_t gfp);
 extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
 extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
+extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp);
 
 extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
 extern void argv_free(char **argv);
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 8a96df1..4888273 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1973,6 +1973,9 @@
 	__u8     incoming;
 } __packed;
 
+/* vendor events */
+#define HCI_EV_VENDOR           0xff
+
 /* ---- HCI Packet structures ---- */
 #define HCI_COMMAND_HDR_SIZE 3
 #define HCI_EVENT_HDR_SIZE   2
diff --git a/include/net/bluetooth/hci_le_splitter.h b/include/net/bluetooth/hci_le_splitter.h
index 09e1169..22bef00 100644
--- a/include/net/bluetooth/hci_le_splitter.h
+++ b/include/net/bluetooth/hci_le_splitter.h
@@ -33,6 +33,7 @@
 
 #ifdef CONFIG_BT_HCI_LE_SPLITTER
 
+int hci_le_splitter_sysfs_init(void);
 void hci_le_splitter_init_start(struct hci_dev *hdev);
 int hci_le_splitter_init_done(struct hci_dev *hdev);
 void hci_le_splitter_init_fail(struct hci_dev *hdev);
@@ -77,6 +78,11 @@
 
 }
 
+static inline int hci_le_splitter_sysfs_init(void)
+{
+	return 0;
+}
+
 #endif
 
 #endif /* __HCI_LE_SPLITTER_H */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index d9a054f..ff48aa83 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -328,8 +328,8 @@
 }
 #endif
 
-#define IPV6_FRAG_HIGH_THRESH	(4 * 1024*1024)	/* 4194304 */
-#define IPV6_FRAG_LOW_THRESH	(3 * 1024*1024)	/* 3145728 */
+#define IPV6_FRAG_HIGH_THRESH	(256 * 1024)	/* 262144 */
+#define IPV6_FRAG_LOW_THRESH	(192 * 1024)	/* 196608 */
 #define IPV6_FRAG_TIMEOUT	(60 * HZ)	/* 60 seconds */
 
 int __ipv6_addr_type(const struct in6_addr *addr);
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 30db069..b1f67b6 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -623,7 +623,10 @@
 #define PCI_EXT_CAP_ID_SECPCI	0x19	/* Secondary PCIe Capability */
 #define PCI_EXT_CAP_ID_PMUX	0x1A	/* Protocol Multiplexing */
 #define PCI_EXT_CAP_ID_PASID	0x1B	/* Process Address Space ID */
-#define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_PASID
+#define PCI_EXT_CAP_ID_DPC	0x1D	/* Downstream Port Containment */
+#define PCI_EXT_CAP_ID_L1SS	0x1E	/* L1 PM Substates */
+#define PCI_EXT_CAP_ID_PTM	0x1F	/* Precision Time Measurement */
+#define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_PTM
 
 #define PCI_EXT_CAP_DSN_SIZEOF	12
 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
@@ -899,4 +902,19 @@
 #define PCI_TPH_CAP_ST_SHIFT	16	/* st table shift */
 #define PCI_TPH_BASE_SIZEOF	12	/* size with no st table */
 
+/* L1 PM Substates */
+#define PCI_L1SS_CAP		    4	/* capability register */
+#define  PCI_L1SS_CAP_PCIPM_L1_2	 1	/* PCI PM L1.2 Support */
+#define  PCI_L1SS_CAP_PCIPM_L1_1	 2	/* PCI PM L1.1 Support */
+#define  PCI_L1SS_CAP_ASPM_L1_2		 4	/* ASPM L1.2 Support */
+#define  PCI_L1SS_CAP_ASPM_L1_1		 8	/* ASPM L1.1 Support */
+#define  PCI_L1SS_CAP_L1_PM_SS		16	/* L1 PM Substates Support */
+#define PCI_L1SS_CTL1		    8	/* Control Register 1 */
+#define  PCI_L1SS_CTL1_PCIPM_L1_2	1	/* PCI PM L1.2 Enable */
+#define  PCI_L1SS_CTL1_PCIPM_L1_1	2	/* PCI PM L1.1 Support */
+#define  PCI_L1SS_CTL1_ASPM_L1_2	4	/* ASPM L1.2 Support */
+#define  PCI_L1SS_CTL1_ASPM_L1_1	8	/* ASPM L1.1 Support */
+#define  PCI_L1SS_CTL1_L1SS_MASK	0x0000000F
+#define PCI_L1SS_CTL2		    0xC	/* Control Register 2 */
+
 #endif /* LINUX_PCI_REGS_H */
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 8168394..5db9346 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -571,7 +571,8 @@
 #define V4L2_CID_MPEG_VIDEO_VPX_P_FRAME_QP		(V4L2_CID_MPEG_BASE+510)
 #define V4L2_CID_MPEG_VIDEO_VPX_PROFILE			(V4L2_CID_MPEG_BASE+511)
 
-#define V4L2_CID_MPEG_VIDEO_VP8_FRAME_HDR		(V4L2_CID_MPEG_BASE+512)
+/* Control ID not existing in upstream */
+#define V4L2_CID_MPEG_VIDEO_VP8_FRAME_HDR		(V4L2_CID_MPEG_BASE+590)
 
 /*  MPEG-class control IDs specific to the CX2341x driver as defined by V4L2 */
 #define V4L2_CID_MPEG_CX2341X_BASE 				(V4L2_CTRL_CLASS_MPEG | 0x1000)
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 5e1a8c8..891e127 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1338,7 +1338,7 @@
 #define V4L2_CTRL_FLAG_VOLATILE		0x0080
 #define V4L2_CTRL_FLAG_HAS_PAYLOAD	0x0100
 #define V4L2_CTRL_FLAG_EXECUTE_ON_WRITE	0x0200
-#define V4L2_CTRL_FLAG_CAN_STORE	0x0400
+#define V4L2_CTRL_FLAG_CAN_STORE	0x8000
 
 /*  Query flags, to be ORed with the control ID */
 #define V4L2_CTRL_FLAG_NEXT_CTRL	0x80000000
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8b47609..bb5045d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1476,7 +1476,7 @@
 	spinlock_t *old_ptl, *new_ptl;
 	int ret = 0;
 	pmd_t pmd;
-
+	bool force_flush = false;
 	struct mm_struct *mm = vma->vm_mm;
 
 	if ((old_addr & ~HPAGE_PMD_MASK) ||
@@ -1504,6 +1504,8 @@
 		if (new_ptl != old_ptl)
 			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
 		pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
+		if (pmd_present(pmd))
+			force_flush = true;
 		VM_BUG_ON(!pmd_none(*new_pmd));
 
 		if (pmd_move_must_withdraw(new_ptl, old_ptl)) {
@@ -1512,6 +1514,8 @@
 			pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
 		}
 		set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
+		if (force_flush)
+			flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
 		if (new_ptl != old_ptl)
 			spin_unlock(new_ptl);
 		spin_unlock(old_ptl);
diff --git a/mm/low-mem-notify.c b/mm/low-mem-notify.c
index e9047e1..675ea37 100644
--- a/mm/low-mem-notify.c
+++ b/mm/low-mem-notify.c
@@ -37,13 +37,8 @@
 unsigned long low_mem_minfree;
 unsigned int low_mem_ram_vs_swap_weight = 4;
 
-/*
- * We're interested in worst-case anon memory usage when the low-memory
- * notification fires.  To contain logging, we limit our interest to
- * non-trivial steps.
- */
-unsigned long low_mem_lowest_seen_anon_mem;
-const unsigned long low_mem_anon_mem_delta = 10 * 1024 * 1024 / PAGE_SIZE;
+/* Limit logging low memory to once per second. */
+DEFINE_RATELIMIT_STATE(low_mem_logging_ratelimit, 1 * HZ, 1);
 
 struct low_mem_notify_file_info {
 	unsigned long unused;
@@ -189,8 +184,7 @@
 				      struct kobj_attribute *attr,
 				      char *buf)
 {
-	const int lru_base = NR_LRU_BASE - LRU_BASE;
-	unsigned long available_mem = get_available_mem_adj(lru_base);
+	unsigned long available_mem = get_available_mem_adj();
 
 	return sprintf(buf, "%lu\n",
 		       available_mem / (1024 * 1024 / PAGE_SIZE));
@@ -222,7 +216,6 @@
 	if (err)
 		pr_err("low_mem: register sysfs failed\n");
 	low_mem_minfree = low_mem_margin_to_minfree(low_mem_margin_mb);
-	low_mem_lowest_seen_anon_mem = totalram_pages;
 	return err;
 }
 module_init(low_mem_init)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index bd08c9b..1c689dc3 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -988,11 +988,6 @@
 		*policy |= (pol->flags & MPOL_MODE_FLAGS);
 	}
 
-	if (vma) {
-		up_read(&current->mm->mmap_sem);
-		vma = NULL;
-	}
-
 	err = 0;
 	if (nmask) {
 		if (mpol_store_user_nodemask(pol)) {
diff --git a/mm/mlock.c b/mm/mlock.c
index 861c012..03f89c34 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -90,6 +90,7 @@
 			putback_lru_page(page);
 	}
 }
+EXPORT_SYMBOL_GPL(mlock_vma_page);
 
 /*
  * Isolate a page from LRU with optional get_page() pin.
@@ -204,6 +205,7 @@
 out:
 	return nr_pages - 1;
 }
+EXPORT_SYMBOL_GPL(munlock_vma_page);
 
 /**
  * __mlock_vma_pages_range() -  mlock a range of pages in the vma.
diff --git a/mm/mremap.c b/mm/mremap.c
index 0843feb..f59e920 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -97,6 +97,8 @@
 	struct mm_struct *mm = vma->vm_mm;
 	pte_t *old_pte, *new_pte, pte;
 	spinlock_t *old_ptl, *new_ptl;
+	bool force_flush = false;
+	unsigned long len = old_end - old_addr;
 
 	/*
 	 * When need_rmap_locks is true, we take the i_mmap_mutex and anon_vma
@@ -143,12 +145,26 @@
 		if (pte_none(*old_pte))
 			continue;
 		pte = ptep_get_and_clear(mm, old_addr, old_pte);
+		/*
+		 * If we are remapping a valid PTE, make sure
+		 * to flush TLB before we drop the PTL for the PTE.
+		 *
+		 * NOTE! Both old and new PTL matter: the old one
+		 * for racing with page_mkclean(), the new one to
+		 * make sure the physical page stays valid until
+		 * the TLB entry for the old mapping has been
+		 * flushed.
+		 */
+		if (pte_present(pte))
+			force_flush = true;
 		pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
 		pte = move_soft_dirty_pte(pte);
 		set_pte_at(mm, new_addr, new_pte, pte);
 	}
 
 	arch_leave_lazy_mmu_mode();
+	if (force_flush)
+		flush_tlb_range(vma, old_end - len, old_end);
 	if (new_ptl != old_ptl)
 		spin_unlock(new_ptl);
 	pte_unmap(new_pte - 1);
@@ -168,7 +184,6 @@
 {
 	unsigned long extent, next, old_end;
 	pmd_t *old_pmd, *new_pmd;
-	bool need_flush = false;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
 
@@ -199,7 +214,6 @@
 						    new_addr, old_end,
 						    old_pmd, new_pmd);
 			if (err > 0) {
-				need_flush = true;
 				continue;
 			} else if (!err) {
 				split_huge_page_pmd(vma, old_addr, old_pmd);
@@ -216,10 +230,7 @@
 			extent = LATENCY_LIMIT;
 		move_ptes(vma, old_pmd, old_addr, old_addr + extent,
 			  new_vma, new_pmd, new_addr, need_rmap_locks);
-		need_flush = true;
 	}
-	if (likely(need_flush))
-		flush_tlb_range(vma, old_end-len, old_addr);
 
 	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 
diff --git a/mm/util.c b/mm/util.c
index d641fcb..afca558 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -81,6 +81,8 @@
  * @s: the string to duplicate
  * @max: read at most @max chars from @s
  * @gfp: the GFP mask used in the kmalloc() call when allocating memory
+ *
+ * Note: Use kmemdup_nul() instead if the size is known exactly.
  */
 char *kstrndup(const char *s, size_t max, gfp_t gfp)
 {
@@ -119,6 +121,28 @@
 EXPORT_SYMBOL(kmemdup);
 
 /**
+ * kmemdup_nul - Create a NUL-terminated string from unterminated data
+ * @s: The data to stringify
+ * @len: The size of the data
+ * @gfp: the GFP mask used in the kmalloc() call when allocating memory
+ */
+char *kmemdup_nul(const char *s, size_t len, gfp_t gfp)
+{
+	char *buf;
+
+	if (!s)
+		return NULL;
+
+	buf = kmalloc_track_caller(len + 1, gfp);
+	if (buf) {
+		memcpy(buf, s, len);
+		buf[len] = '\0';
+	}
+	return buf;
+}
+EXPORT_SYMBOL(kmemdup_nul);
+
+/**
  * memdup_user - duplicate memory region from user space
  *
  * @src: source address in user space
@@ -268,6 +292,37 @@
 }
 EXPORT_SYMBOL(strndup_user);
 
+/**
+ * memdup_user_nul - duplicate memory region from user space and NUL-terminate
+ *
+ * @src: source address in user space
+ * @len: number of bytes to copy
+ *
+ * Returns an ERR_PTR() on failure.
+ */
+void *memdup_user_nul(const void __user *src, size_t len)
+{
+	char *p;
+
+	/*
+	 * Always use GFP_KERNEL, since copy_from_user() can sleep and
+	 * cause pagefault, which makes it pointless to use GFP_NOFS
+	 * or GFP_ATOMIC.
+	 */
+	p = kmalloc_track_caller(len + 1, GFP_KERNEL);
+	if (!p)
+		return ERR_PTR(-ENOMEM);
+
+	if (copy_from_user(p, src, len)) {
+		kfree(p);
+		return ERR_PTR(-EFAULT);
+	}
+	p[len] = '\0';
+
+	return p;
+}
+EXPORT_SYMBOL(memdup_user_nul);
+
 void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct vm_area_struct *prev, struct rb_node *rb_parent)
 {
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 8f9c5e9..a5b1302 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -30,6 +30,7 @@
 #include <asm/ioctls.h>
 
 #include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_le_splitter.h>
 #include <linux/proc_fs.h>
 
 #include "leds.h"
@@ -774,6 +775,10 @@
 
 	BT_INFO("HCI device and connection manager initialized");
 
+	err = hci_le_splitter_sysfs_init();
+	if (err < 0)
+		goto error;
+
 	err = hci_sock_init();
 	if (err < 0)
 		goto error;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 3b8df19..a3bd9c6 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -4196,10 +4196,8 @@
 			continue;
 		}
 
-		if (!hci_le_splitter_should_allow_bluez_rx(hdev, skb)) {
-			kfree_skb(skb);
+		if (!hci_le_splitter_should_allow_bluez_rx(hdev, skb))
 			continue;
-		}
 
 		if (test_bit(HCI_INIT, &hdev->flags)) {
 			/* Don't process data packets in this states. */
diff --git a/net/bluetooth/hci_le_splitter.c b/net/bluetooth/hci_le_splitter.c
index 8c1ab01..330e226 100644
--- a/net/bluetooth/hci_le_splitter.c
+++ b/net/bluetooth/hci_le_splitter.c
@@ -4,7 +4,10 @@
 #include <net/bluetooth/hci_le_splitter.h>
 #include <linux/miscdevice.h>
 #include <linux/semaphore.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
 #include <asm/atomic.h>
+#include <asm/ioctls.h>
 
 
 /* RXed bytes we'll queue before giving up on userspace. picked arbitrarily */
@@ -20,6 +23,13 @@
 	size_t tx_in_flight;
 };
 
+enum {
+	SPLITTER_STATE_NOT_SET,
+	SPLITTER_STATE_DISABLED,
+	SPLITTER_STATE_ENABLED,
+};
+
+
 /* This mutex protects the below (essentially splitter internal state) */
 static DEFINE_MUTEX(hci_state_lock);
 static struct hci_dev *cur_dev;
@@ -28,6 +38,7 @@
 static struct hci_le_splitter_le_conn tracked_conns[HCI_LE_SPLIT_MAX_LE_CONNS];
 static u32 max_pkts_in_flight;
 static u32 cur_pkts_in_flight;
+static u8 splitter_enable_state = SPLITTER_STATE_NOT_SET;
 
 /* protects command sequencing */
 static DEFINE_SEMAPHORE(cmd_sem);
@@ -38,9 +49,6 @@
 /* "is chip in state to talk to second stack?" */
 static atomic_t chip_ready_for_second_stack = ATOMIC_INIT(0);
 
-/* "is one-time init done?" */
-static atomic_t one_time_init_done = ATOMIC_INIT(0);
-
 /* protects messages waiting to be read */
 static DEFINE_MUTEX(usr_msg_q_lock);
 static DECLARE_WAIT_QUEUE_HEAD(usr_msg_wait_q);
@@ -76,6 +84,7 @@
 
 static void hci_le_splitter_usr_queue_reset_message(bool allow_commands);
 static struct hci_le_splitter_le_conn *cid_find_le_conn(u16 cid);
+static struct device_attribute sysfs_attr;
 static struct miscdevice mdev;
 
 
@@ -135,18 +144,12 @@
 	else
 		pr_info("HCI splitter ignoring dev\n");
 
-	if (!atomic_cmpxchg(&one_time_init_done, 0, 1)) {
-
-		skb_queue_head_init(&usr_msg_q);
-		misc_register(&mdev);
-	}
-
 	mutex_unlock(&hci_state_lock);
 }
 
 int hci_le_splitter_init_done(struct hci_dev *hdev)
 {
-	//nothing to do for now
+	/* nothing to do for now */
 
 	return 0;
 }
@@ -155,6 +158,7 @@
 				    size_t bytes, loff_t *off)
 {
 	struct sk_buff *skb;
+	u8 packet_typ;
 	ssize_t ret;
 
 
@@ -179,17 +183,19 @@
 
 	} while (!skb);
 
-	if (bytes > skb->len)
-		bytes = skb->len;
+	/* one byte for hci packet type */
+	packet_typ = hci_skb_pkt_type(skb);
 
-	if (skb->len > bytes) {
-		ret = -ETOOSMALL;
-	} else if (copy_to_user(userbuf, skb->data, bytes)) {
+	if (skb->len + sizeof(packet_typ) > bytes) {
+		ret = -ENOMEM;
+	} else if (put_user(packet_typ, userbuf) ||
+			copy_to_user(userbuf + sizeof(packet_typ),
+							skb->data, skb->len)) {
 		ret = -EFAULT;
 	} else {
-		usr_msg_q_len -= bytes;
+		usr_msg_q_len -= skb->len;
+		ret = (ssize_t)skb->len + 1;
 		kfree_skb(skb);
-		ret = (ssize_t)bytes;
 	}
 
 	if (ret < 0)
@@ -208,6 +214,7 @@
 {
 	struct hci_acl_hdr acl_hdr;
 	struct sk_buff *skb;
+	u16 cmd_val = 0;
 	u8 pkt_typ;
 
 	if (bytes < 1)
@@ -247,6 +254,8 @@
 		if (bytes - cmd_hdr.plen - HCI_COMMAND_HDR_SIZE)
 			return -EINVAL;
 
+		cmd_val = __le16_to_cpu(cmd_hdr.opcode);
+
 	} else {
 		return -EINVAL;
 	}
@@ -256,7 +265,7 @@
 		return -ENOMEM;
 
 	hci_skb_pkt_type(skb) = pkt_typ;
-	if (copy_from_user(skb->data, userbuf, bytes)) {
+	if (copy_from_user(skb_put(skb, bytes), userbuf, bytes)) {
 		kfree_skb(skb);
 		return -EFAULT;
 	}
@@ -293,6 +302,9 @@
 	/* perform the actual transmission */
 	__net_timestamp(skb);
 	mutex_lock(&hci_state_lock);
+	if (pkt_typ == HCI_COMMAND_PKT)
+		le_waiting_on_opcode = cmd_val;
+
 	hci_send_to_monitor(cur_dev, skb);
 	skb_orphan(skb);
 	if (cur_dev->send(cur_dev, skb) < 0) {
@@ -338,41 +350,99 @@
 
 	mutex_lock(&usr_msg_q_lock);
 	hci_le_splitter_usr_purge_rx_q();
-	hci_le_splitter_usr_queue_reset_message(atomic_read(&chip_ready_for_second_stack) != 0);
 	mutex_unlock(&usr_msg_q_lock);
+	hci_le_splitter_usr_queue_reset_message(atomic_read(&chip_ready_for_second_stack) != 0);
 
 	return ret;
 }
 
 static int hci_le_splitter_release(struct inode *inode, struct file *file)
 {
+	int32_t dev_id = -1;
+
 	mutex_lock(&usr_msg_q_lock);
 	hci_le_splitter_usr_purge_rx_q();
 
-	if (!atomic_cmpxchg(&chip_ready_for_second_stack, 1, 0)) {
-
+	if (atomic_cmpxchg(&usr_connected, 1, 0)) {
 		/* file close while chip was being used - we must reset it */
-		struct sk_buff *skb = bt_skb_alloc(HCI_COMMAND_HDR_SIZE, GFP_KERNEL);
-		if (skb) {
-			struct hci_command_hdr *cmd = (struct hci_command_hdr *)skb->data;
-
-			cmd->opcode = __cpu_to_le16(HCI_OP_RESET);
-			cmd->plen = 0;
-			hci_skb_pkt_type(skb) = HCI_COMMAND_PKT;
-
-			hci_send_to_monitor(cur_dev, skb);
-			skb_orphan(skb);
-			if (!cur_dev || !cur_dev->send || cur_dev->send(cur_dev, skb))
-				kfree_skb(skb);
-		}
+		if (cur_dev)
+			dev_id = cur_dev->id;
+		pr_info("reset queued\n");
 	}
 
-	atomic_set(&usr_connected, 0);
+	atomic_set(&chip_ready_for_second_stack, 0);
 	mutex_unlock(&usr_msg_q_lock);
 
+	if (dev_id >= 0) {
+		int ret;
+		ret = hci_dev_reset(dev_id);
+		/* none of this matters - we must restart bluetoothd to regain ability to run */
+	}
+
 	return 0;
 }
 
+static long hci_le_splitter_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct sk_buff *skb;
+	int readable_sz;
+
+	switch (cmd) {
+	case FIONREAD:		/* if we had multiple readers, this would be bad */
+		mutex_lock(&usr_msg_q_lock);
+		skb = skb_peek(&usr_msg_q);
+		readable_sz = skb ? skb->len + 1 : 0;
+		mutex_unlock(&usr_msg_q_lock);
+		return put_user(readable_sz, (int __user *)arg);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static ssize_t hci_le_splitter_sysfs_enabled_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	int ret = 0;
+
+	mutex_lock(&hci_state_lock);
+	switch (splitter_enable_state) {
+	case SPLITTER_STATE_NOT_SET:
+		ret = sprintf(buf, "%s\n", "NOT SET");
+		break;
+	case SPLITTER_STATE_DISABLED:
+		ret = sprintf(buf, "%s\n", "OFF");
+		break;
+	case SPLITTER_STATE_ENABLED:
+		ret = sprintf(buf, "%s\n", "ON");
+		break;
+	}
+	mutex_unlock(&hci_state_lock);
+
+	return ret;
+}
+
+static ssize_t hci_le_splitter_sysfs_enabled_store(struct device *dev,
+		struct device_attribute *attr, const char *buf,
+		size_t count)
+{
+	ssize_t ret = strlen(buf);
+	bool set;
+
+	if (strtobool(buf, &set) < 0)
+		return -EINVAL;
+
+	mutex_lock(&hci_state_lock);
+	if (splitter_enable_state == SPLITTER_STATE_NOT_SET)
+		splitter_enable_state =
+			set ? SPLITTER_STATE_ENABLED : SPLITTER_STATE_DISABLED;
+	else
+		ret = -EPERM;
+	mutex_unlock(&hci_state_lock);
+
+	return ret;
+}
+
 static const struct file_operations hci_le_splitter_fops = {
 
 	.read	        = hci_le_splitter_read,
@@ -380,6 +450,7 @@
 	.open	        = hci_le_splitter_open,
 	.poll	        = hci_le_splitter_poll,
 	.release	= hci_le_splitter_release,
+	.unlocked_ioctl	= hci_le_splitter_ioctl,
 };
 
 static struct miscdevice mdev = {
@@ -389,11 +460,18 @@
 	.fops  = &hci_le_splitter_fops
 };
 
+static struct device_attribute sysfs_attr =
+	__ATTR(le_splitter_enabled,           /* file name */
+	0660,                                 /* file permissions */
+	hci_le_splitter_sysfs_enabled_show,   /* file read fn */
+	hci_le_splitter_sysfs_enabled_store); /* file write fn */
+
+
 void hci_le_splitter_deinit(struct hci_dev *hdev)
 {
 	mutex_lock(&hci_state_lock);
-	mutex_lock(&usr_msg_q_lock);
 	if (hci_le_splitter_is_our_dev(hdev)) {
+		mutex_lock(&usr_msg_q_lock);
 		cur_dev = NULL;
 		pr_info("HCI splitter unregistered\n");
 
@@ -401,9 +479,9 @@
 		wake_up_interruptible(&usr_msg_wait_q);
 		wake_up_interruptible(&tx_has_room_wait_q);
 		atomic_set(&chip_ready_for_second_stack, 0);
+		mutex_unlock(&usr_msg_q_lock);
 		hci_le_splitter_usr_queue_reset_message(false);
 	}
-	mutex_unlock(&usr_msg_q_lock);
 	mutex_unlock(&hci_state_lock);
 }
 
@@ -416,11 +494,6 @@
 {
 	bool ret = true, skipsem = true;
 
-
-	pr_debug("**** tx type = %u, op=0x%04X count=%u\n", bt_cb(skb)->pkt_type,
-	     bt_cb(skb)->pkt_type == HCI_COMMAND_PKT ? hci_skb_opcode(skb) : 0,
-	     cmd_sem.count);
-
 	mutex_lock(&hci_state_lock);
 
 	if (hci_le_splitter_is_our_dev(hdev) &&
@@ -431,7 +504,15 @@
 
 		skipsem = false;
 
-		if (opcode == HCI_OP_RESET) {
+		if (splitter_enable_state == SPLITTER_STATE_NOT_SET) {
+			/* if state is not set, drop all packets */
+			pr_warn("LE splitter not initialized - chip TX denied!\n");
+			ret = false;
+		} else if (splitter_enable_state == SPLITTER_STATE_DISABLED) {
+			/* if disabled - allow all packets */
+			ret = true;
+			skipsem = true;
+		} else if (opcode == HCI_OP_RESET) {
 
 			static bool first = true;
 			if (!first)
@@ -472,7 +553,7 @@
 				pr_info("EDR stack unmasked some events unexpectedly - OK, just weird\n");
 			mask |= evtsLE;
 			*mask_loc = __cpu_to_le64(mask);
-			pr_debug("modified event mask 0x%016llX -> 0x%016llX\n",
+			pr_info("modified event mask 0x%016llX -> 0x%016llX\n",
 				(unsigned long long)oldmask,
 				(unsigned long long)mask);
 
@@ -510,6 +591,7 @@
 	return !!cid_find_le_conn(cid);
 }
 
+/* always takes ownership of skb */
 static void hci_le_splitter_enq_packet(struct sk_buff *skb)
 {
 	mutex_lock(&usr_msg_q_lock);
@@ -540,8 +622,8 @@
 	if (!skb)
 		return;
 
-	ev = (struct hci_event_hdr *)skb->data;
-	cc = (struct hci_ev_cmd_complete *)(ev + 1);
+	ev = (struct hci_event_hdr *)skb_put(skb, HCI_EVENT_HDR_SIZE);
+	cc = (struct hci_ev_cmd_complete *)skb_put(skb, sizeof(struct hci_ev_cmd_complete));
 
 	hci_skb_pkt_type(skb) = HCI_EVENT_PKT;
 	ev->evt = HCI_EV_CMD_COMPLETE;
@@ -798,9 +880,9 @@
 						      plen, GFP_KERNEL);
 		if (le_evt) {	/* if this fails, you have bigger problems */
 
-			struct hci_event_hdr *new_hdr = (void *) le_evt->data;
+			struct hci_event_hdr *new_hdr = (struct hci_event_hdr*)skb_put(le_evt, HCI_EVENT_HDR_SIZE);
 			struct hci_ev_num_comp_pkts *new_evt = (struct hci_ev_num_comp_pkts *)
-				(((u8 *) le_evt->data) + HCI_EVENT_HDR_SIZE);
+				skb_put(le_evt, sizeof(struct hci_ev_num_comp_pkts) + sizeof(struct hci_comp_pkts_info) * le_nonzero_conns);
 
 			hci_skb_pkt_type(le_evt) = HCI_EVENT_PKT;
 			new_hdr->evt = HCI_EV_NUM_COMP_PKTS;
@@ -846,15 +928,21 @@
 		evt->num_hndl = j;
 	}
 
-	/* if any LE packets got freed, signal user */
+	/* if any LE buffers got freed, signal user */
 	if (le_pkts)
 		wake_up_interruptible(&tx_has_room_wait_q);
 
-	/* if any EDR packets left in the event, it is not ours to claim */
+    /* if any EDR packets left in the event, it is not ours to claim */
+	if (evt->num_hndl)
+        return false;
+
+    /* but if no EDR handles left, we need to free the skb */
+    
+
 	return !evt->num_hndl;
 }
 
-/* called with lock held, return true to let bluez have the event */
+/* called with lock held, return true to let bluez have the event. if we return false, WE must free packet */
 static bool hci_le_splitter_should_allow_bluez_rx_evt(struct sk_buff *skb)
 {
 	struct hci_event_hdr *hdr = (void *) skb->data;
@@ -869,8 +957,6 @@
 	bool isours = false, enq_if_ours = true;
 	int len_chng = 0;
 
-	pr_debug("**** rx evt 0x%04X count=%u\n", hdr->evt, cmd_sem.count);
-
 	switch (hdr->evt) {
 	case HCI_EV_DISCONN_COMPLETE:
 		isours = hci_le_splitter_filter_disconn(disconn);
@@ -899,28 +985,40 @@
 	case HCI_EV_LE_META:
 		isours = hci_le_splitter_filter_le_meta(le_meta);
 		break;
+	case HCI_EV_VENDOR:
+		/* always ours */
+		isours = true;
+		break;
 	}
 
 	skb->len += len_chng;
 
 	if (isours && enq_if_ours)
 		hci_le_splitter_enq_packet(skb);
+	else if (isours) /* we still own it, so free it */
+		kfree_skb(skb);
 
 	return !isours;
 }
 
-/* return true to let bluez have the packet */
+/* return true to let bluez have the packet. if we return false, WE must free packet */
 bool hci_le_splitter_should_allow_bluez_rx(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	u16 acl_handle, acl_flags;
 	bool ret = true;
 
-	pr_debug("**** rx %u count=%u\n", hci_skb_pkt_type(skb), cmd_sem.count);
-
 	mutex_lock(&hci_state_lock);
 	if (hci_le_splitter_is_our_dev(hdev)) {
 
-		switch (hci_skb_pkt_type(skb)) {
+		if (splitter_enable_state == SPLITTER_STATE_NOT_SET) {
+			/* if state is not set, drop all packets */
+			pr_warn("LE splitter not initialized - chip RX denied!\n");
+			kfree_skb(skb);
+			ret = false;
+		} else if (splitter_enable_state == SPLITTER_STATE_DISABLED) {
+			/* if disabled - allow all packets */
+			ret = true;
+		} else switch (hci_skb_pkt_type(skb)) {
 		case HCI_EVENT_PKT:
 			/* invalid (too small) packet? let bluez handle it */
 			if (HCI_EVENT_HDR_SIZE > skb->len)
@@ -960,3 +1058,19 @@
 	return ret;
 }
 
+int hci_le_splitter_sysfs_init(void)
+{
+	int err;
+
+	BT_INFO("Initializing LE splitter sysfs");
+	skb_queue_head_init(&usr_msg_q);
+	misc_register(&mdev);
+
+	err = device_create_file(mdev.this_device, &sysfs_attr);
+	if (err) {
+		pr_err("Cannot create sysfs file (%d) - off by default\n", err);
+		splitter_enable_state = SPLITTER_STATE_DISABLED;
+		return -1;
+	}
+	return 0;
+}
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 91d9957..89a7eef 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -1988,13 +1988,6 @@
 	hci_dev_unlock(hdev);
 }
 
-static void disable_advertising(struct hci_request *req)
-{
-	u8 enable = 0x00;
-
-	hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
-}
-
 static int active_scan(struct hci_request *req, unsigned long opt)
 {
 	uint16_t interval = opt;
@@ -2006,24 +1999,6 @@
 
 	BT_DBG("%s", hdev->name);
 
-	if (hci_dev_test_flag(hdev, HCI_LE_ADV) || hci_dev_test_flag(hdev, HCI_LE_ADV_CHANGE_IN_PROGRESS)) {
-
-		hci_dev_lock(hdev);
-
-		/* Don't let discovery abort an outgoing connection attempt
-		 * that's using directed advertising.
-		 */
-		if (hci_lookup_le_connect(hdev)) {
-			hci_dev_unlock(hdev);
-			return -EBUSY;
-		}
-
-		cancel_adv_timeout(hdev);
-		hci_dev_unlock(hdev);
-
-		disable_advertising(req);
-	}
-
 	/* If controller is scanning, it means the background scanning is
 	 * running. Thus, we should temporarily stop it in order to set the
 	 * discovery scanning parameters.
diff --git a/net/core/sock.c b/net/core/sock.c
index e4a8e50..6df8823 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1530,6 +1530,8 @@
 
 		sock_copy(newsk, sk);
 
+		newsk->sk_prot_creator = sk->sk_prot;
+
 		/* SANITY */
 		get_net(sock_net(newsk));
 		sk_node_init(&newsk->sk_node);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index c10a3ce..ebffeaf 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -822,22 +822,14 @@
 
 static int __net_init ipv4_frags_init_net(struct net *net)
 {
-	/* Fragment cache limits.
-	 *
-	 * The fragment memory accounting code, (tries to) account for
-	 * the real memory usage, by measuring both the size of frag
-	 * queue struct (inet_frag_queue (ipv4:ipq/ipv6:frag_queue))
-	 * and the SKB's truesize.
-	 *
-	 * A 64K fragment consumes 129736 bytes (44*2944)+200
-	 * (1500 truesize == 2944, sizeof(struct ipq) == 200)
-	 *
-	 * We will commit 4MB at one time. Should we cross that limit
-	 * we will prune down to 3MB, making room for approx 8 big 64K
-	 * fragments 8x128k.
+	/*
+	 * Fragment cache limits. We will commit 256K at one time. Should we
+	 * cross that limit we will prune down to 192K. This should cope with
+	 * even the most extreme cases without allowing an attacker to
+	 * measurably harm machine performance.
 	 */
-	net->ipv4.frags.high_thresh = 4 * 1024 * 1024;
-	net->ipv4.frags.low_thresh  = 3 * 1024 * 1024;
+	net->ipv4.frags.high_thresh = 256 * 1024;
+	net->ipv4.frags.low_thresh = 192 * 1024;
 	/*
 	 * Important NOTE! Fragment queue must be destroyed before MSL expires.
 	 * RFC791 is wrong proposing to prolongate timer each fragment arrival
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3840ab2..73f02aa 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1093,7 +1093,7 @@
 	lock_sock(sk);
 
 	flags = msg->msg_flags;
-	if (flags & MSG_FASTOPEN) {
+	if ((flags & MSG_FASTOPEN) && !tp->repair) {
 		err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
 		if (err == -EINPROGRESS && copied_syn > 0)
 			goto out;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bcf8eae..ad4f5e3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4599,6 +4599,7 @@
 static void tcp_collapse_ofo_queue(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	u32 range_truesize, sum_tiny = 0;
 	struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
 	struct sk_buff *head;
 	u32 start, end;
@@ -4608,6 +4609,7 @@
 
 	start = TCP_SKB_CB(skb)->seq;
 	end = TCP_SKB_CB(skb)->end_seq;
+	range_truesize = skb->truesize;
 	head = skb;
 
 	for (;;) {
@@ -4622,14 +4624,24 @@
 		if (!skb ||
 		    after(TCP_SKB_CB(skb)->seq, end) ||
 		    before(TCP_SKB_CB(skb)->end_seq, start)) {
-			tcp_collapse(sk, &tp->out_of_order_queue,
-				     head, skb, start, end);
+			/* Do not attempt collapsing tiny skbs */
+			if (range_truesize != head->truesize ||
+			    end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
+				tcp_collapse(sk, &tp->out_of_order_queue,
+					     head, skb, start, end);
+			} else {
+				sum_tiny += range_truesize;
+				if (sum_tiny > sk->sk_rcvbuf >> 3)
+					return;
+			}
+
 			head = skb;
 			if (!skb)
 				break;
 			/* Start new segment */
 			start = TCP_SKB_CB(skb)->seq;
 			end = TCP_SKB_CB(skb)->end_seq;
+			range_truesize = skb->truesize;
 		} else {
 			if (before(TCP_SKB_CB(skb)->seq, start))
 				start = TCP_SKB_CB(skb)->seq;
@@ -4685,6 +4697,9 @@
 	else if (sk_under_memory_pressure(sk))
 		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
 
+	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
+		return 0;
+
 	tcp_collapse_ofo_queue(sk);
 	if (!skb_queue_empty(&sk->sk_receive_queue))
 		tcp_collapse(sk, &sk->sk_receive_queue,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 1cc3400..bc01fcc 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3239,7 +3239,7 @@
 		p += pol->sadb_x_policy_len*8;
 		sec_ctx = (struct sadb_x_sec_ctx *)p;
 		if (len < pol->sadb_x_policy_len*8 +
-		    sec_ctx->sadb_x_sec_len) {
+		    sec_ctx->sadb_x_sec_len*8) {
 			*dir = -EINVAL;
 			goto out;
 		}
diff --git a/net/mac80211-4.2/debug.h b/net/mac80211-4.2/debug.h
index 3c0f57f..1956b31 100644
--- a/net/mac80211-4.2/debug.h
+++ b/net/mac80211-4.2/debug.h
@@ -162,7 +162,7 @@
 		   sdata, fmt, ##__VA_ARGS__)
 
 #define mpath_dbg(sdata, fmt, ...)					\
-	_sdata_info(				\
+	_sdata_dbg(MAC80211_MPATH_DEBUG,				\
 		   sdata, fmt, ##__VA_ARGS__)
 
 #define mhwmp_dbg(sdata, fmt, ...)					\
diff --git a/net/mac80211-4.2/debugfs_netdev.c b/net/mac80211-4.2/debugfs_netdev.c
index 8680647..85812521e 100644
--- a/net/mac80211-4.2/debugfs_netdev.c
+++ b/net/mac80211-4.2/debugfs_netdev.c
@@ -488,6 +488,72 @@
 }
 IEEE80211_IF_FILE_RW(tsf);
 
+/* Updates the interface RX limits to all the STA's connected */
+static void update_rx_limit(struct ieee80211_sub_if_data *sdata,
+			    struct sta_info *sta)
+{
+	list_for_each_entry(sta, &sdata->local->sta_list, list) {
+		sta->mc_rx_limit.rate = sdata->mc_rx_limit_rate;
+		sta->bc_rx_limit.rate = sdata->bc_rx_limit_rate;
+
+		/* Calculate the bc/mc receive frame burst size */
+		mc_bc_burst_size(sta);
+	}
+}
+
+static ssize_t ieee80211_if_fmt_mc_bc_rx_limit(
+	const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
+{
+	return scnprintf(buf, buflen, "mc: %u\nbc: %u\nburst_size: %u\n",
+			 sdata->mc_rx_limit_rate, sdata->bc_rx_limit_rate,
+			 sdata->burst_size);
+}
+
+static ssize_t ieee80211_if_parse_mc_bc_rx_limit(
+	struct ieee80211_sub_if_data *sdata, const char *buf, int buflen)
+{
+	struct sta_info *sta;
+	int ret;
+	bool mc = false, bc = false;
+	u32 rate = 0, burst_size = 0;
+
+	if (strncmp(buf, "mc:", 3) == 0) {
+		buf += 3;
+		mc = true;
+	} else if (strncmp(buf, "bc:", 3) == 0) {
+		buf += 3;
+		bc = true;
+	} else if (strncmp(buf, "reset", 5) == 0) {
+		/* Disables the multicast and broadcast RX limit logic */
+		sdata->mc_rx_limit_rate = 0;
+		sdata->bc_rx_limit_rate = 0;
+		update_rx_limit(sdata, sta);
+	} else if (strncmp(buf, "burst_size:", 11) == 0) {
+		ret = kstrtouint(buf + 11, 0, &burst_size);
+		if (ret < 0)
+			return ret;
+		sdata->burst_size = burst_size;
+	} else {
+		return -EINVAL;
+	}
+
+	if (mc || bc) {
+		ret = kstrtouint(buf, 0, &rate);
+		if (ret < 0)
+			return ret;
+		/* Allow the rates less than 100000Kbps(100Mbps) */
+		if (rate <= 100000) {
+			mc > bc ? (sdata->mc_rx_limit_rate = rate) :
+				  (sdata->bc_rx_limit_rate = rate);
+			update_rx_limit(sdata, sta);
+		} else {
+			return -EINVAL;
+		}
+	}
+	return buflen;
+}
+
+IEEE80211_IF_FILE_RW(mc_bc_rx_limit);
 
 /* WDS attributes */
 IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC);
@@ -703,6 +769,7 @@
 	DEBUGFS_ADD(txpower);
 	DEBUGFS_ADD(user_power_level);
 	DEBUGFS_ADD(ap_power_level);
+	DEBUGFS_ADD(mc_bc_rx_limit);
 
 	if (sdata->vif.type != NL80211_IFTYPE_MONITOR)
 		add_common_files(sdata);
diff --git a/net/mac80211-4.2/debugfs_sta.c b/net/mac80211-4.2/debugfs_sta.c
index e1b6bd4..47662b6 100644
--- a/net/mac80211-4.2/debugfs_sta.c
+++ b/net/mac80211-4.2/debugfs_sta.c
@@ -474,6 +474,86 @@
 
 STA_OPS_RW(mc_bc_stats);
 
+static ssize_t sta_mc_bc_rx_limit_read(struct file *file, char __user *userbuf,
+				       size_t count, loff_t *ppos)
+{
+	char buf[100], *p = buf;
+	struct sta_info *sta = file->private_data;
+
+	rcu_read_lock();
+	p += scnprintf(p, sizeof(buf) + buf - p, "mc: %u\nbc: %u\n",
+		       sta->mc_rx_limit.rate, sta->bc_rx_limit.rate);
+
+	rcu_read_unlock();
+
+	return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+}
+
+static ssize_t sta_mc_bc_rx_limit_write(struct file *file,
+					const char __user *userbuf,
+					size_t count, loff_t *ppos)
+{
+	char buf[32] = {}, *pbuf = buf;
+	struct sta_info *sta = file->private_data;
+	int ret = 0;
+	bool mc = false, bc = false;
+	u32 rate = 0;
+
+	if (!sta)
+		return -ENOENT;
+
+	if (count > sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(pbuf, userbuf, count))
+		return -EFAULT;
+
+	pbuf[sizeof(buf) - 1] = '\0';
+
+	if (strncmp(pbuf, "mc:", 3) == 0) {
+		pbuf += 3;
+		mc = true;
+	} else if (strncmp(pbuf, "bc:", 3) == 0) {
+		pbuf += 3;
+		bc = true;
+	} else if (strncmp(pbuf, "reset", 5) == 0) {
+		rcu_read_lock();
+		spin_lock_bh(&sta->lock);
+		/* reset both multicast and broadcast to
+		 * interface default rate
+		 */
+		sta->mc_rx_limit.rate = sta->sdata->mc_rx_limit_rate;
+		sta->bc_rx_limit.rate = sta->sdata->bc_rx_limit_rate;
+		/* Calculate the bc/mc receive frame burst size */
+		mc_bc_burst_size(sta);
+		spin_unlock_bh(&sta->lock);
+		rcu_read_unlock();
+	} else {
+		return -EINVAL;
+	}
+
+	if (mc || bc) {
+		ret = kstrtouint(pbuf, 0, &rate);
+		/* Allow the rates less than 100000Kbps(100Mbps) */
+		if (rate <= 100000) {
+			rcu_read_lock();
+			spin_lock_bh(&sta->lock);
+			mc > bc ? (sta->mc_rx_limit.rate = rate) :
+				  (sta->bc_rx_limit.rate = rate);
+			/* Calculate the bc/mc receive frame burst size */
+			mc_bc_burst_size(sta);
+			spin_unlock_bh(&sta->lock);
+			rcu_read_unlock();
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	return ret ?: count;
+}
+
+STA_OPS_RW(mc_bc_rx_limit);
+
 static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
 				 size_t count, loff_t *ppos)
 {
@@ -796,6 +876,7 @@
 	DEBUGFS_ADD(rx_stats);
 	DEBUGFS_ADD(last_ack_signal);
 	DEBUGFS_ADD(mc_bc_stats);
+	DEBUGFS_ADD(mc_bc_rx_limit);
 
 	DEBUGFS_ADD_COUNTER(rx_duplicates, num_duplicates);
 	DEBUGFS_ADD_COUNTER(rx_fragments, rx_fragments);
diff --git a/net/mac80211-4.2/ieee80211_i.h b/net/mac80211-4.2/ieee80211_i.h
index f564782..e91afbe 100644
--- a/net/mac80211-4.2/ieee80211_i.h
+++ b/net/mac80211-4.2/ieee80211_i.h
@@ -916,6 +916,12 @@
 	bool rc_has_mcs_mask[IEEE80211_NUM_BANDS];
 	u8  rc_rateidx_mcs_mask[IEEE80211_NUM_BANDS][IEEE80211_HT_MCS_MASK_LEN];
 
+	/* multicast and broadcast RX limit rate in Kbps */
+	u32 bc_rx_limit_rate;
+	u32 mc_rx_limit_rate;
+	/* burst size in Bytes */
+	u32 burst_size;
+
 	union {
 		struct ieee80211_if_ap ap;
 		struct ieee80211_if_wds wds;
@@ -2087,6 +2093,8 @@
 					  const u8 *addr);
 void ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata,
 					   struct sk_buff *skb);
+/* Converts the skb len in Bytes to nanosec */
+s64 skblen_to_ns(u32 rate, unsigned int len);
 
 extern const struct ethtool_ops ieee80211_ethtool_ops;
 
diff --git a/net/mac80211-4.2/iface.c b/net/mac80211-4.2/iface.c
index 4a438f6..f996a44 100644
--- a/net/mac80211-4.2/iface.c
+++ b/net/mac80211-4.2/iface.c
@@ -1824,6 +1824,12 @@
 
 	sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM;
 
+	/* multicast and broadcast RX limit logic is disabled by default */
+	sdata->mc_rx_limit_rate = 0;
+	sdata->bc_rx_limit_rate = 0;
+	/* set the default burst size as 5 times of Frame length */
+	sdata->burst_size = IEEE80211_MAX_FRAME_LEN * 5;
+
 	/* setup type-dependent data */
 	ieee80211_setup_sdata(sdata, type);
 
diff --git a/net/mac80211-4.2/mesh_pathtbl.c b/net/mac80211-4.2/mesh_pathtbl.c
index be9071e..e968e29 100644
--- a/net/mac80211-4.2/mesh_pathtbl.c
+++ b/net/mac80211-4.2/mesh_pathtbl.c
@@ -864,8 +864,8 @@
 		}
 	}
 	rcu_read_unlock();
-	mpath_dbg(sta->sdata, " MESH MPL the link to %pM is broken and %d path deactivated \n",
-			  sta->addr, paths_deactivated);
+	sdata_info(sta->sdata, "MESH MPL the link to %pM is broken and %d path deactivated\n",
+		   sta->addr, paths_deactivated);
 }
 
 static void mesh_path_node_reclaim(struct rcu_head *rp)
diff --git a/net/mac80211-4.2/rx.c b/net/mac80211-4.2/rx.c
index 9001681..8b5009a 100644
--- a/net/mac80211-4.2/rx.c
+++ b/net/mac80211-4.2/rx.c
@@ -2413,6 +2413,56 @@
 	}
 }
 
+/* Converts the skb len in Bytes to nanosec */
+s64 skblen_to_ns(u32 rate, unsigned int len)
+{
+	return (((len * 1000 * 8) / rate) * 1000);
+}
+
+static bool mc_bc_rx_limit(struct ieee80211_rx_data *rx,
+			   struct rx_rate_limit *rx_limit)
+{
+	u64 now;
+	s64 toks;
+
+	/* Get the present time in nanosec */
+	now = ktime_to_ns(ktime_get());
+
+	/* Replenish tokens according to time elapsed since last receive */
+	toks = min_t(s64, now - rx_limit->t_c, rx_limit->burst_size);
+	toks += rx_limit->tokens;
+
+	/* Limit the available token to burst_size */
+	if (toks > rx_limit->burst_size)
+		toks = rx_limit->burst_size;
+
+	toks  -= (s64)skblen_to_ns(rx_limit->rate,  rx->skb->len);
+
+	if (toks >= 0) {
+		rx_limit->t_c = now;
+		rx_limit->tokens = toks;
+		return true;
+	} else {
+		return false;
+	}
+}
+
+static bool ieee80211_mc_bc_rx_limit(char *dst, struct ieee80211_rx_data *rx)
+{
+	struct sta_info *sta = rx->sta;
+
+	if (is_multicast_ether_addr(dst) && sta) {
+		if (is_broadcast_ether_addr(dst)) {
+			if (sta->bc_rx_limit.rate)
+				return mc_bc_rx_limit(rx, &sta->bc_rx_limit);
+		} else {
+			if (sta->mc_rx_limit.rate)
+				return mc_bc_rx_limit(rx, &sta->mc_rx_limit);
+		}
+	}
+	return true;
+}
+
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
 {
@@ -2459,6 +2509,10 @@
 	/* Get the multicat broadcast stats */
 	ieee80211_mc_bc_stats(((struct ethhdr *)rx->skb->data)->h_dest, rx);
 
+	if (!ieee80211_mc_bc_rx_limit(((struct ethhdr *)rx->skb->data)->h_dest,
+				      rx))
+		return RX_DROP_MONITOR;
+
 	if (!ieee80211_frame_allowed(rx, fc))
 		return RX_DROP_MONITOR;
 
diff --git a/net/mac80211-4.2/sta_info.c b/net/mac80211-4.2/sta_info.c
index 4f76821..779c4736 100644
--- a/net/mac80211-4.2/sta_info.c
+++ b/net/mac80211-4.2/sta_info.c
@@ -297,6 +297,25 @@
 	return 0;
 }
 
+/* Calculate the bc/mc receive frame burst size */
+void mc_bc_burst_size(struct sta_info *sta)
+{
+	/* Set the burst size as 5MTUs per rate */
+	if (sta->mc_rx_limit.rate)
+		sta->mc_rx_limit.burst_size =
+			skblen_to_ns(sta->mc_rx_limit.rate,
+				     sta->sdata->burst_size);
+	else
+		sta->mc_rx_limit.burst_size = 0;
+
+	if (sta->bc_rx_limit.rate)
+		sta->bc_rx_limit.burst_size =
+			skblen_to_ns(sta->bc_rx_limit.rate,
+				     sta->sdata->burst_size);
+	else
+		sta->bc_rx_limit.burst_size = 0;
+}
+
 struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 				const u8 *addr, gfp_t gfp)
 {
@@ -339,6 +358,13 @@
 	/* Mark TID as unreserved */
 	sta->reserved_tid = IEEE80211_TID_UNRESERVED;
 
+	/* Update the interface multicast and broadcast default RX rate
+	 * to STA RX limit
+	 */
+	sta->mc_rx_limit.rate = sdata->mc_rx_limit_rate;
+	sta->bc_rx_limit.rate = sdata->bc_rx_limit_rate;
+	/* Calculate the bc/mc receive frame burst size */
+	mc_bc_burst_size(sta);
 	ktime_get_ts(&uptime);
 	sta->last_connected = uptime.tv_sec;
 	ewma_init(&sta->avg_signal, 1024, 8);
diff --git a/net/mac80211-4.2/sta_info.h b/net/mac80211-4.2/sta_info.h
index 84da1760..a0c0f169 100644
--- a/net/mac80211-4.2/sta_info.h
+++ b/net/mac80211-4.2/sta_info.h
@@ -341,6 +341,17 @@
 	u64 bc_bytes;
 };
 
+struct rx_rate_limit {
+	/* token count */
+	s64 tokens;
+	/* The burst limit in ns */
+	s64 burst_size;
+	/* last RX timestamp */
+	u64 t_c;
+	/* The receive rate limit */
+	u32 rate;
+};
+
 /**
  * struct sta_info - STA information
  *
@@ -503,6 +514,7 @@
 	u64 tx_packets[IEEE80211_NUM_ACS];
 	u64 tx_bytes[IEEE80211_NUM_ACS];
 	struct mc_bc_stats mc_bc_stat;
+	struct rx_rate_limit mc_rx_limit, bc_rx_limit;
 	struct ieee80211_tx_rate last_tx_rate;
 	int last_rx_rate_idx;
 	u32 last_rx_rate_flag;
@@ -733,6 +745,9 @@
 			  unsigned long exp_time);
 u8 sta_info_tx_streams(struct sta_info *sta);
 
+/* Calculates the bc/mc receive frame burst size */
+void mc_bc_burst_size(struct sta_info *sta);
+
 void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta);
 void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta);
 void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 182ebf8..3660532 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -224,6 +224,9 @@
 {
 	struct xt_match *match;
 
+	if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
+		return ERR_PTR(-EINVAL);
+
 	match = xt_find_match(nfproto, name, revision);
 	if (IS_ERR(match)) {
 		request_module("%st_%s", xt_prefix[nfproto], name);
@@ -268,6 +271,9 @@
 {
 	struct xt_target *target;
 
+	if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
+		return ERR_PTR(-EINVAL);
+
 	target = xt_find_target(af, name, revision);
 	if (IS_ERR(target)) {
 		request_module("%st_%s", xt_prefix[af], name);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index a4a9f52..7496c14 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1258,11 +1258,14 @@
 
 static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
 {
+	u16 prev_family;
 	int i;
 
 	if (nr > XFRM_MAX_DEPTH)
 		return -EINVAL;
 
+	prev_family = family;
+
 	for (i = 0; i < nr; i++) {
 		/* We never validated the ut->family value, so many
 		 * applications simply leave it at zero.  The check was
@@ -1274,6 +1277,12 @@
 		if (!ut[i].family)
 			ut[i].family = family;
 
+		if ((ut[i].mode == XFRM_MODE_TRANSPORT) &&
+		    (ut[i].family != prev_family))
+			return -EINVAL;
+
+		prev_family = ut[i].family;
+
 		switch (ut[i].family) {
 		case AF_INET:
 			break;
diff --git a/security/chromiumos/alt-syscall.c b/security/chromiumos/alt-syscall.c
index 3638c1e..b4106c51 100644
--- a/security/chromiumos/alt-syscall.c
+++ b/security/chromiumos/alt-syscall.c
@@ -20,50 +20,11 @@
 
 #include <asm/unistd.h>
 
-static int allow_devmode_syscalls;
-
-#ifdef CONFIG_SYSCTL
-static int zero;
-static int one = 1;
-
-static struct ctl_path chromiumos_sysctl_path[] = {
-	{ .procname = "kernel", },
-	{ .procname = "chromiumos", },
-	{ .procname = "alt_syscall", },
-	{ }
-};
-
-static struct ctl_table chromiumos_sysctl_table[] = {
-	{
-		.procname       = "allow_devmode_syscalls",
-		.data           = &allow_devmode_syscalls,
-		.maxlen         = sizeof(int),
-		.mode           = 0644,
-		.proc_handler   = proc_dointvec_minmax,
-		.extra1         = &zero,
-		.extra2         = &one,
-	},
-	{ }
-};
-#endif
-
-struct syscall_whitelist_entry {
-	unsigned int nr;
-	sys_call_ptr_t alt;
-};
-
-struct syscall_whitelist {
-	const char *name;
-	const struct syscall_whitelist_entry *whitelist;
-	unsigned int nr_whitelist;
-#ifdef CONFIG_COMPAT
-	const struct syscall_whitelist_entry *compat_whitelist;
-	unsigned int nr_compat_whitelist;
-#endif
-	bool permissive;
-};
-
-static struct alt_sys_call_table default_table;
+#include "alt-syscall.h"
+#include "android_whitelists.h"
+#include "complete_whitelists.h"
+#include "read_write_test_whitelists.h"
+#include "third_party_whitelists.h"
 
 /* Intercept and log blocked syscalls. */
 static asmlinkage long block_syscall(void)
@@ -123,12 +84,8 @@
 
 	return do_syscall(fn);
 }
-#endif
+#endif /* CONFIG_COMPAT */
 
-/*
- * If an alt_syscall table allows prctl(), override it to prevent a process
- * from changing its syscall table.
- */
 static asmlinkage long alt_sys_prctl(int option, unsigned long arg2,
 				     unsigned long arg3, unsigned long arg4,
 				     unsigned long arg5)
@@ -140,317 +97,6 @@
 	return sys_prctl(option, arg2, arg3, arg4, arg5);
 }
 
-#ifdef CONFIG_COMPAT
-#define SYSCALL_WHITELIST_COMPAT(x)					\
-	.compat_whitelist = x ## _compat_whitelist,			\
-	.nr_compat_whitelist = ARRAY_SIZE(x ## _compat_whitelist),
-#else
-#define SYSCALL_WHITELIST_COMPAT(x)
-#endif
-
-#define SYSCALL_WHITELIST(x)						\
-	{								\
-		.name = #x,						\
-		.whitelist = x ## _whitelist,				\
-		.nr_whitelist = ARRAY_SIZE(x ## _whitelist),		\
-		SYSCALL_WHITELIST_COMPAT(x)				\
-	}
-
-#define PERMISSIVE_SYSCALL_WHITELIST(x)					\
-	{								\
-		.name = #x "_permissive",				\
-		.permissive = true,					\
-		.whitelist = x ## _whitelist,				\
-		.nr_whitelist = ARRAY_SIZE(x ## _whitelist),		\
-		SYSCALL_WHITELIST_COMPAT(x)				\
-	}
-
-#ifdef CONFIG_COMPAT
-#ifdef CONFIG_X86_64
-#define __NR_compat_access	__NR_ia32_access
-#define __NR_compat_adjtimex	__NR_ia32_adjtimex
-#define __NR_compat_brk	__NR_ia32_brk
-#define __NR_compat_capget	__NR_ia32_capget
-#define __NR_compat_capset	__NR_ia32_capset
-#define __NR_compat_chdir	__NR_ia32_chdir
-#define __NR_compat_chmod	__NR_ia32_chmod
-#define __NR_compat_clock_adjtime	__NR_ia32_clock_adjtime
-#define __NR_compat_clock_getres	__NR_ia32_clock_getres
-#define __NR_compat_clock_gettime	__NR_ia32_clock_gettime
-#define __NR_compat_clock_nanosleep	__NR_ia32_clock_nanosleep
-#define __NR_compat_clock_settime	__NR_ia32_clock_settime
-#define __NR_compat_clone	__NR_ia32_clone
-#define __NR_compat_close	__NR_ia32_close
-#define __NR_compat_creat	__NR_ia32_creat
-#define __NR_compat_dup	__NR_ia32_dup
-#define __NR_compat_dup2	__NR_ia32_dup2
-#define __NR_compat_dup3	__NR_ia32_dup3
-#define __NR_compat_epoll_create	__NR_ia32_epoll_create
-#define __NR_compat_epoll_create1	__NR_ia32_epoll_create1
-#define __NR_compat_epoll_ctl	__NR_ia32_epoll_ctl
-#define __NR_compat_epoll_wait	__NR_ia32_epoll_wait
-#define __NR_compat_epoll_pwait	__NR_ia32_epoll_pwait
-#define __NR_compat_eventfd	__NR_ia32_eventfd
-#define __NR_compat_eventfd2	__NR_ia32_eventfd2
-#define __NR_compat_execve	__NR_ia32_execve
-#define __NR_compat_exit	__NR_ia32_exit
-#define __NR_compat_exit_group	__NR_ia32_exit_group
-#define __NR_compat_faccessat	__NR_ia32_faccessat
-#define __NR_compat_fallocate	__NR_ia32_fallocate
-#define __NR_compat_fchdir	__NR_ia32_fchdir
-#define __NR_compat_fchmod	__NR_ia32_fchmod
-#define __NR_compat_fchmodat	__NR_ia32_fchmodat
-#define __NR_compat_fchown	__NR_ia32_fchown
-#define __NR_compat_fchownat	__NR_ia32_fchownat
-#define __NR_compat_fcntl	__NR_ia32_fcntl
-#define __NR_compat_fdatasync	__NR_ia32_fdatasync
-#define __NR_compat_fgetxattr	__NR_ia32_fgetxattr
-#define __NR_compat_flistxattr	__NR_ia32_flistxattr
-#define __NR_compat_flock	__NR_ia32_flock
-#define __NR_compat_fork	__NR_ia32_fork
-#define __NR_compat_fremovexattr	__NR_ia32_fremovexattr
-#define __NR_compat_fsetxattr	__NR_ia32_fsetxattr
-#define __NR_compat_fstat	__NR_ia32_fstat
-#define __NR_compat_fstatfs	__NR_ia32_fstatfs
-#define __NR_compat_fsync	__NR_ia32_fsync
-#define __NR_compat_ftruncate	__NR_ia32_ftruncate
-#define __NR_compat_futex	__NR_ia32_futex
-#define __NR_compat_futimesat	__NR_ia32_futimesat
-#define __NR_compat_getcpu	__NR_ia32_getcpu
-#define __NR_compat_getcwd	__NR_ia32_getcwd
-#define __NR_compat_getdents	__NR_ia32_getdents
-#define __NR_compat_getdents64	__NR_ia32_getdents64
-#define __NR_compat_getegid	__NR_ia32_getegid
-#define __NR_compat_geteuid	__NR_ia32_geteuid
-#define __NR_compat_getgid	__NR_ia32_getgid
-#define __NR_compat_getgroups32	__NR_ia32_getgroups32
-#define __NR_compat_getpgid	__NR_ia32_getpgid
-#define __NR_compat_getpgrp	__NR_ia32_getpgrp
-#define __NR_compat_getpid	__NR_ia32_getpid
-#define __NR_compat_getppid	__NR_ia32_getppid
-#define __NR_compat_getpriority	__NR_ia32_getpriority
-#define __NR_compat_getresgid	__NR_ia32_getresgid
-#define __NR_compat_getresuid	__NR_ia32_getresuid
-#define __NR_compat_getrlimit	__NR_ia32_getrlimit
-#define __NR_compat_getrusage	__NR_ia32_getrusage
-#define __NR_compat_getsid	__NR_ia32_getsid
-#define __NR_compat_gettid	__NR_ia32_gettid
-#define __NR_compat_gettimeofday	__NR_ia32_gettimeofday
-#define __NR_compat_getuid	__NR_ia32_getuid
-#define __NR_compat_getxattr	__NR_ia32_getxattr
-#define __NR_compat_inotify_add_watch	__NR_ia32_inotify_add_watch
-#define __NR_compat_inotify_init	__NR_ia32_inotify_init
-#define __NR_compat_inotify_init1	__NR_ia32_inotify_init1
-#define __NR_compat_inotify_rm_watch	__NR_ia32_inotify_rm_watch
-#define __NR_compat_ioctl	__NR_ia32_ioctl
-#define __NR_compat_ioprio_set	__NR_ia32_ioprio_set
-#define __NR_compat_kill	__NR_ia32_kill
-#define __NR_compat_lgetxattr	__NR_ia32_lgetxattr
-#define __NR_compat_link	__NR_ia32_link
-#define __NR_compat_linkat	__NR_ia32_linkat
-#define __NR_compat_listxattr	__NR_ia32_listxattr
-#define __NR_compat_llistxattr	__NR_ia32_llistxattr
-#define __NR_compat_lremovexattr	__NR_ia32_lremovexattr
-#define __NR_compat_lseek	__NR_ia32_lseek
-#define __NR_compat_lsetxattr	__NR_ia32_lsetxattr
-#define __NR_compat_lstat	__NR_ia32_lstat
-#define __NR_compat_madvise	__NR_ia32_madvise
-#define __NR_compat_mincore	__NR_ia32_mincore
-#define __NR_compat_mkdir	__NR_ia32_mkdir
-#define __NR_compat_mkdirat	__NR_ia32_mkdirat
-#define __NR_compat_mknod	__NR_ia32_mknod
-#define __NR_compat_mknodat	__NR_ia32_mknodat
-#define __NR_compat_mlock	__NR_ia32_mlock
-#define __NR_compat_munlock	__NR_ia32_munlock
-#define __NR_compat_mlockall	__NR_ia32_mlockall
-#define __NR_compat_munlockall	__NR_ia32_munlockall
-#define __NR_compat_modify_ldt	__NR_ia32_modify_ldt
-#define __NR_compat_mount	__NR_ia32_mount
-#define __NR_compat_mprotect	__NR_ia32_mprotect
-#define __NR_compat_mremap	__NR_ia32_mremap
-#define __NR_compat_msync	__NR_ia32_msync
-#define __NR_compat_munmap	__NR_ia32_munmap
-#define __NR_compat_name_to_handle_at	__NR_ia32_name_to_handle_at
-#define __NR_compat_nanosleep	__NR_ia32_nanosleep
-#define __NR_compat_open	__NR_ia32_open
-#define __NR_compat_open_by_handle_at	__NR_ia32_open_by_handle_at
-#define __NR_compat_openat	__NR_ia32_openat
-#define __NR_compat_perf_event_open	__NR_ia32_perf_event_open
-#define __NR_compat_personality	__NR_ia32_personality
-#define __NR_compat_pipe	__NR_ia32_pipe
-#define __NR_compat_pipe2	__NR_ia32_pipe2
-#define __NR_compat_poll	__NR_ia32_poll
-#define __NR_compat_ppoll	__NR_ia32_ppoll
-#define __NR_compat_prctl	__NR_ia32_prctl
-#define __NR_compat_pread64	__NR_ia32_pread64
-#define __NR_compat_preadv	__NR_ia32_preadv
-#define __NR_compat_prlimit64	__NR_ia32_prlimit64
-#define __NR_compat_process_vm_readv	__NR_ia32_process_vm_readv
-#define __NR_compat_process_vm_writev	__NR_ia32_process_vm_writev
-#define __NR_compat_pselect6	__NR_ia32_pselect6
-#define __NR_compat_ptrace	__NR_ia32_ptrace
-#define __NR_compat_pwrite64	__NR_ia32_pwrite64
-#define __NR_compat_pwritev	__NR_ia32_pwritev
-#define __NR_compat_read	__NR_ia32_read
-#define __NR_compat_readahead	__NR_ia32_readahead
-#define __NR_compat_readv	__NR_ia32_readv
-#define __NR_compat_readlink	__NR_ia32_readlink
-#define __NR_compat_readlinkat	__NR_ia32_readlinkat
-#define __NR_compat_recvmmsg	__NR_ia32_recvmmsg
-#define __NR_compat_remap_file_pages	__NR_ia32_remap_file_pages
-#define __NR_compat_removexattr	__NR_ia32_removexattr
-#define __NR_compat_rename	__NR_ia32_rename
-#define __NR_compat_renameat	__NR_ia32_renameat
-#define __NR_compat_restart_syscall	__NR_ia32_restart_syscall
-#define __NR_compat_rmdir	__NR_ia32_rmdir
-#define __NR_compat_rt_sigaction	__NR_ia32_rt_sigaction
-#define __NR_compat_rt_sigpending	__NR_ia32_rt_sigpending
-#define __NR_compat_rt_sigprocmask	__NR_ia32_rt_sigprocmask
-#define __NR_compat_rt_sigqueueinfo	__NR_ia32_rt_sigqueueinfo
-#define __NR_compat_rt_sigreturn	__NR_ia32_rt_sigreturn
-#define __NR_compat_rt_sigsuspend	__NR_ia32_rt_sigsuspend
-#define __NR_compat_rt_sigtimedwait	__NR_ia32_rt_sigtimedwait
-#define __NR_compat_rt_tgsigqueueinfo	__NR_ia32_rt_tgsigqueueinfo
-#define __NR_compat_sched_get_priority_max	__NR_ia32_sched_get_priority_max
-#define __NR_compat_sched_get_priority_min	__NR_ia32_sched_get_priority_min
-#define __NR_compat_sched_getaffinity	__NR_ia32_sched_getaffinity
-#define __NR_compat_sched_getparam	__NR_ia32_sched_getparam
-#define __NR_compat_sched_getscheduler	__NR_ia32_sched_getscheduler
-#define __NR_compat_sched_setaffinity	__NR_ia32_sched_setaffinity
-#define __NR_compat_sched_setscheduler	__NR_ia32_sched_setscheduler
-#define __NR_compat_sched_yield	__NR_ia32_sched_yield
-#define __NR_compat_seccomp	__NR_ia32_seccomp
-#define __NR_compat_sendfile	__NR_ia32_sendfile
-#define __NR_compat_sendfile64	__NR_ia32_sendfile64
-#define __NR_compat_sendmmsg	__NR_ia32_sendmmsg
-#define __NR_compat_set_robust_list	__NR_ia32_set_robust_list
-#define __NR_compat_set_tid_address	__NR_ia32_set_tid_address
-#define __NR_compat_set_thread_area	__NR_ia32_set_thread_area
-#define __NR_compat_setgid	__NR_ia32_setgid
-#define __NR_compat_setgroups	__NR_ia32_setgroups
-#define __NR_compat_setitimer	__NR_ia32_setitimer
-#define __NR_compat_setns	__NR_ia32_setns
-#define __NR_compat_setpgid	__NR_ia32_setpgid
-#define __NR_compat_setpriority	__NR_ia32_setpriority
-#define __NR_compat_setregid	__NR_ia32_setregid
-#define __NR_compat_setresgid	__NR_ia32_setresgid
-#define __NR_compat_setresuid	__NR_ia32_setresuid
-#define __NR_compat_setrlimit	__NR_ia32_setrlimit
-#define __NR_compat_setsid	__NR_ia32_setsid
-#define __NR_compat_settimeofday	__NR_ia32_settimeofday
-#define __NR_compat_setuid	__NR_ia32_setuid
-#define __NR_compat_setxattr	__NR_ia32_setxattr
-#define __NR_compat_signalfd4	__NR_ia32_signalfd4
-#define __NR_compat_sigaltstack	__NR_ia32_sigaltstack
-#define __NR_compat_socketcall	__NR_ia32_socketcall
-#define __NR_compat_splice	__NR_ia32_splice
-#define __NR_compat_stat	__NR_ia32_stat
-#define __NR_compat_statfs	__NR_ia32_statfs
-#define __NR_compat_symlink	__NR_ia32_symlink
-#define __NR_compat_symlinkat	__NR_ia32_symlinkat
-#define __NR_compat_sync_file_range	__NR_ia32_sync_file_range
-#define __NR_compat_sysinfo	__NR_ia32_sysinfo
-#define __NR_compat_syslog	__NR_ia32_syslog
-#define __NR_compat_tee		__NR_ia32_tee
-#define __NR_compat_tgkill	__NR_ia32_tgkill
-#define __NR_compat_tkill	__NR_ia32_tkill
-#define __NR_compat_time	__NR_ia32_time
-#define __NR_compat_timer_create	__NR_ia32_timer_create
-#define __NR_compat_timer_delete	__NR_ia32_timer_delete
-#define __NR_compat_timer_getoverrun	__NR_ia32_timer_getoverrun
-#define __NR_compat_timer_gettime	__NR_ia32_timer_gettime
-#define __NR_compat_timer_settime	__NR_ia32_timer_settime
-#define __NR_compat_timerfd_create	__NR_ia32_timerfd_create
-#define __NR_compat_timerfd_gettime	__NR_ia32_timerfd_gettime
-#define __NR_compat_timerfd_settime	__NR_ia32_timerfd_settime
-#define __NR_compat_times		__NR_ia32_times
-#define __NR_compat_truncate	__NR_ia32_truncate
-#define __NR_compat_umask	__NR_ia32_umask
-#define __NR_compat_umount2	__NR_ia32_umount2
-#define __NR_compat_uname	__NR_ia32_uname
-#define __NR_compat_unlink	__NR_ia32_unlink
-#define __NR_compat_unlinkat	__NR_ia32_unlinkat
-#define __NR_compat_unshare	__NR_ia32_unshare
-#define __NR_compat_ustat	__NR_ia32_ustat
-#define __NR_compat_utimensat	__NR_ia32_utimensat
-#define __NR_compat_utimes	__NR_ia32_utimes
-#define __NR_compat_vfork	__NR_ia32_vfork
-#define __NR_compat_vmsplice	__NR_ia32_vmsplice
-#define __NR_compat_wait4	__NR_ia32_wait4
-#define __NR_compat_waitid	__NR_ia32_waitid
-#define __NR_compat_waitpid	__NR_ia32_waitpid
-#define __NR_compat_write	__NR_ia32_write
-#define __NR_compat_writev	__NR_ia32_writev
-#define __NR_compat_chown32	__NR_ia32_chown32
-#define __NR_compat_fadvise64	__NR_ia32_fadvise64
-#define __NR_compat_fadvise64_64	__NR_ia32_fadvise64_64
-#define __NR_compat_fchown32	__NR_ia32_fchown32
-#define __NR_compat_fcntl64	__NR_ia32_fcntl64
-#define __NR_compat_fstat64	__NR_ia32_fstat64
-#define __NR_compat_fstatat64	__NR_ia32_fstatat64
-#define __NR_compat_fstatfs64	__NR_ia32_fstatfs64
-#define __NR_compat_ftruncate64	__NR_ia32_ftruncate64
-#define __NR_compat_getegid32	__NR_ia32_getegid32
-#define __NR_compat_geteuid32	__NR_ia32_geteuid32
-#define __NR_compat_getgid32	__NR_ia32_getgid32
-#define __NR_compat_getresgid32	__NR_ia32_getresgid32
-#define __NR_compat_getresuid32	__NR_ia32_getresuid32
-#define __NR_compat_getuid32	__NR_ia32_getuid32
-#define __NR_compat_lchown32	__NR_ia32_lchown32
-#define __NR_compat_lstat64	__NR_ia32_lstat64
-#define __NR_compat_mmap2	__NR_ia32_mmap2
-#define __NR_compat__newselect	__NR_ia32__newselect
-#define __NR_compat__llseek	__NR_ia32__llseek
-#define __NR_compat_sigaction	__NR_ia32_sigaction
-#define __NR_compat_sigpending	__NR_ia32_sigpending
-#define __NR_compat_sigprocmask	__NR_ia32_sigprocmask
-#define __NR_compat_sigreturn	__NR_ia32_sigreturn
-#define __NR_compat_sigsuspend	__NR_ia32_sigsuspend
-#define __NR_compat_setgid32	__NR_ia32_setgid32
-#define __NR_compat_setgroups32	__NR_ia32_setgroups32
-#define __NR_compat_setregid32	__NR_ia32_setregid32
-#define __NR_compat_setresgid32	__NR_ia32_setresgid32
-#define __NR_compat_setresuid32	__NR_ia32_setresuid32
-#define __NR_compat_setreuid32	__NR_ia32_setreuid32
-#define __NR_compat_setuid32	__NR_ia32_setuid32
-#define __NR_compat_stat64	__NR_ia32_stat64
-#define __NR_compat_statfs64	__NR_ia32_statfs64
-#define __NR_compat_truncate64	__NR_ia32_truncate64
-#define __NR_compat_ugetrlimit	__NR_ia32_ugetrlimit
-#endif
-#endif
-
-#define SYSCALL_ENTRY_ALT(name, func)					\
-	{								\
-		.nr = __NR_ ## name,					\
-		.alt = (sys_call_ptr_t)func,				\
-	}
-#define SYSCALL_ENTRY(name) SYSCALL_ENTRY_ALT(name, NULL)
-#define COMPAT_SYSCALL_ENTRY_ALT(name, func)				\
-	{								\
-		.nr = __NR_compat_ ## name,				\
-		.alt = (sys_call_ptr_t)func,				\
-	}
-#define COMPAT_SYSCALL_ENTRY(name) COMPAT_SYSCALL_ENTRY_ALT(name, NULL)
-
-static struct syscall_whitelist_entry read_write_test_whitelist[] = {
-	SYSCALL_ENTRY(exit),
-	SYSCALL_ENTRY(openat),
-	SYSCALL_ENTRY(close),
-	SYSCALL_ENTRY(read),
-	SYSCALL_ENTRY(write),
-	SYSCALL_ENTRY_ALT(prctl, alt_sys_prctl),
-
-	/* open(2) is deprecated and not wired up on ARM64. */
-#ifndef CONFIG_ARM64
-	SYSCALL_ENTRY(open),
-#endif
-};
-
-/*
- * Syscall overrides for android.
- */
-
 /* Thread priority used by Android. */
 #define ANDROID_PRIORITY_FOREGROUND     -2
 #define ANDROID_PRIORITY_DISPLAY        -4
@@ -468,9 +114,8 @@
 #define CONTAINER_PRIORITY_HIGHEST       -10
 
 /*
- * Reflect the priority adjustment done by android_setpriority.
- * Note that the prio returned by getpriority has been offset by 20.
- * (returns 40..1 instead of -20..19)
+ * TODO(mortonm): Move the implementation of these Android-specific
+ * alt-syscalls (starting with android_*) to their own .c file.
  */
 static asmlinkage long android_getpriority(int which, int who)
 {
@@ -505,7 +150,6 @@
 	return -nice + 20;
 }
 
-/* Make sure nothing sets a nice value more favorable than -10. */
 static asmlinkage long android_setpriority(int which, int who, int niceval)
 {
 	if (niceval < 0) {
@@ -605,479 +249,7 @@
 	return sys_getcpu(cpu, node, tcache);
 }
 
-static struct syscall_whitelist_entry android_whitelist[] = {
-	SYSCALL_ENTRY_ALT(adjtimex, android_adjtimex),
-	SYSCALL_ENTRY(brk),
-	SYSCALL_ENTRY(capget),
-	SYSCALL_ENTRY(capset),
-	SYSCALL_ENTRY(chdir),
-	SYSCALL_ENTRY_ALT(clock_adjtime, android_clock_adjtime),
-	SYSCALL_ENTRY(clock_getres),
-	SYSCALL_ENTRY(clock_gettime),
-	SYSCALL_ENTRY(clock_nanosleep),
-	SYSCALL_ENTRY(clock_settime),
-	SYSCALL_ENTRY(clone),
-	SYSCALL_ENTRY(close),
-	SYSCALL_ENTRY(dup),
-	SYSCALL_ENTRY(dup3),
-	SYSCALL_ENTRY(epoll_create1),
-	SYSCALL_ENTRY(epoll_ctl),
-	SYSCALL_ENTRY(epoll_pwait),
-	SYSCALL_ENTRY(eventfd2),
-	SYSCALL_ENTRY(execve),
-	SYSCALL_ENTRY(exit),
-	SYSCALL_ENTRY(exit_group),
-	SYSCALL_ENTRY(faccessat),
-	SYSCALL_ENTRY(fallocate),
-	SYSCALL_ENTRY(fchdir),
-	SYSCALL_ENTRY(fchmod),
-	SYSCALL_ENTRY(fchmodat),
-	SYSCALL_ENTRY(fchownat),
-	SYSCALL_ENTRY(fcntl),
-	SYSCALL_ENTRY(fdatasync),
-	SYSCALL_ENTRY(fgetxattr),
-	SYSCALL_ENTRY(flistxattr),
-	SYSCALL_ENTRY(flock),
-	SYSCALL_ENTRY(fremovexattr),
-	SYSCALL_ENTRY(fsetxattr),
-	SYSCALL_ENTRY(fstat),
-	SYSCALL_ENTRY(fstatfs),
-	SYSCALL_ENTRY(fsync),
-	SYSCALL_ENTRY(ftruncate),
-	SYSCALL_ENTRY(futex),
-	SYSCALL_ENTRY_ALT(getcpu, android_getcpu),
-	SYSCALL_ENTRY(getcwd),
-	SYSCALL_ENTRY(getdents64),
-	SYSCALL_ENTRY(getpgid),
-	SYSCALL_ENTRY(getpid),
-	SYSCALL_ENTRY(getppid),
-	SYSCALL_ENTRY_ALT(getpriority, android_getpriority),
-	SYSCALL_ENTRY(getrlimit),
-	SYSCALL_ENTRY(getrusage),
-	SYSCALL_ENTRY(getsid),
-	SYSCALL_ENTRY(gettid),
-	SYSCALL_ENTRY(gettimeofday),
-	SYSCALL_ENTRY(getxattr),
-	SYSCALL_ENTRY(inotify_add_watch),
-	SYSCALL_ENTRY(inotify_init1),
-	SYSCALL_ENTRY(inotify_rm_watch),
-	SYSCALL_ENTRY(ioctl),
-	SYSCALL_ENTRY(ioprio_set),
-	SYSCALL_ENTRY(kill),
-	SYSCALL_ENTRY(lgetxattr),
-	SYSCALL_ENTRY(linkat),
-	SYSCALL_ENTRY(listxattr),
-	SYSCALL_ENTRY(llistxattr),
-	SYSCALL_ENTRY(lremovexattr),
-	SYSCALL_ENTRY(lseek),
-	SYSCALL_ENTRY(lsetxattr),
-	SYSCALL_ENTRY(madvise),
-	SYSCALL_ENTRY(mincore),
-	SYSCALL_ENTRY(mkdirat),
-	SYSCALL_ENTRY(mknodat),
-	SYSCALL_ENTRY(mlock),
-	SYSCALL_ENTRY(mlockall),
-	SYSCALL_ENTRY(munlock),
-	SYSCALL_ENTRY(munlockall),
-	SYSCALL_ENTRY(mount),
-	SYSCALL_ENTRY(mprotect),
-	SYSCALL_ENTRY(mremap),
-	SYSCALL_ENTRY(msync),
-	SYSCALL_ENTRY(munmap),
-	SYSCALL_ENTRY(name_to_handle_at),
-	SYSCALL_ENTRY(nanosleep),
-	SYSCALL_ENTRY(open_by_handle_at),
-	SYSCALL_ENTRY(openat),
-	SYSCALL_ENTRY_ALT(perf_event_open, android_perf_event_open),
-	SYSCALL_ENTRY(personality),
-	SYSCALL_ENTRY(pipe2),
-	SYSCALL_ENTRY(ppoll),
-	SYSCALL_ENTRY_ALT(prctl, alt_sys_prctl),
-	SYSCALL_ENTRY(pread64),
-	SYSCALL_ENTRY(preadv),
-	SYSCALL_ENTRY(prlimit64),
-	SYSCALL_ENTRY(process_vm_readv),
-	SYSCALL_ENTRY(process_vm_writev),
-	SYSCALL_ENTRY(pselect6),
-	SYSCALL_ENTRY(ptrace),
-	SYSCALL_ENTRY(pwrite64),
-	SYSCALL_ENTRY(pwritev),
-	SYSCALL_ENTRY(read),
-	SYSCALL_ENTRY(readahead),
-	SYSCALL_ENTRY(readv),
-	SYSCALL_ENTRY(readlinkat),
-	SYSCALL_ENTRY(recvmmsg),
-	SYSCALL_ENTRY(remap_file_pages),
-	SYSCALL_ENTRY(removexattr),
-	SYSCALL_ENTRY(renameat),
-	SYSCALL_ENTRY(restart_syscall),
-	SYSCALL_ENTRY(rt_sigaction),
-	SYSCALL_ENTRY(rt_sigpending),
-	SYSCALL_ENTRY(rt_sigprocmask),
-	SYSCALL_ENTRY(rt_sigqueueinfo),
-	SYSCALL_ENTRY(rt_sigreturn),
-	SYSCALL_ENTRY(rt_sigsuspend),
-	SYSCALL_ENTRY(rt_sigtimedwait),
-	SYSCALL_ENTRY(rt_tgsigqueueinfo),
-	SYSCALL_ENTRY(sched_get_priority_max),
-	SYSCALL_ENTRY(sched_get_priority_min),
-	SYSCALL_ENTRY(sched_getaffinity),
-	SYSCALL_ENTRY(sched_getparam),
-	SYSCALL_ENTRY(sched_getscheduler),
-	SYSCALL_ENTRY(sched_setaffinity),
-	SYSCALL_ENTRY_ALT(sched_setscheduler, android_sched_setscheduler),
-	SYSCALL_ENTRY(sched_yield),
-	SYSCALL_ENTRY(seccomp),
-	SYSCALL_ENTRY(sendfile),
-	SYSCALL_ENTRY(sendmmsg),
-	SYSCALL_ENTRY(set_robust_list),
-	SYSCALL_ENTRY(set_tid_address),
-	SYSCALL_ENTRY(setitimer),
-	SYSCALL_ENTRY(setns),
-	SYSCALL_ENTRY(setpgid),
-	SYSCALL_ENTRY_ALT(setpriority, android_setpriority),
-	SYSCALL_ENTRY(setrlimit),
-	SYSCALL_ENTRY(setsid),
-	SYSCALL_ENTRY(settimeofday),
-	SYSCALL_ENTRY(setxattr),
-	SYSCALL_ENTRY(signalfd4),
-	SYSCALL_ENTRY(sigaltstack),
-	SYSCALL_ENTRY(splice),
-	SYSCALL_ENTRY(statfs),
-	SYSCALL_ENTRY(symlinkat),
-	SYSCALL_ENTRY(sysinfo),
-	SYSCALL_ENTRY(syslog),
-	SYSCALL_ENTRY(tee),
-	SYSCALL_ENTRY(tgkill),
-	SYSCALL_ENTRY(tkill),
-	SYSCALL_ENTRY(time),
-	SYSCALL_ENTRY(timer_create),
-	SYSCALL_ENTRY(timer_delete),
-	SYSCALL_ENTRY(timer_gettime),
-	SYSCALL_ENTRY(timer_getoverrun),
-	SYSCALL_ENTRY(timer_settime),
-	SYSCALL_ENTRY(timerfd_create),
-	SYSCALL_ENTRY(timerfd_gettime),
-	SYSCALL_ENTRY(timerfd_settime),
-	SYSCALL_ENTRY(times),
-	SYSCALL_ENTRY(truncate),
-	SYSCALL_ENTRY(umask),
-	SYSCALL_ENTRY(umount2),
-	SYSCALL_ENTRY(uname),
-	SYSCALL_ENTRY(unlinkat),
-	SYSCALL_ENTRY(unshare),
-	SYSCALL_ENTRY(utimensat),
-	SYSCALL_ENTRY(vmsplice),
-	SYSCALL_ENTRY(wait4),
-	SYSCALL_ENTRY(waitid),
-	SYSCALL_ENTRY(write),
-	SYSCALL_ENTRY(writev),
-
-	/*
-	 * Deprecated syscalls which are not wired up on new architectures
-	 * such as ARM64.
-	 */
-#ifndef CONFIG_ARM64
-	SYSCALL_ENTRY(access),
-	SYSCALL_ENTRY(chmod),
-	SYSCALL_ENTRY(open),
-	SYSCALL_ENTRY(creat),
-	SYSCALL_ENTRY(dup2),
-	SYSCALL_ENTRY(epoll_create),
-	SYSCALL_ENTRY(epoll_wait),
-	SYSCALL_ENTRY(eventfd),
-	SYSCALL_ENTRY(fork),
-	SYSCALL_ENTRY(futimesat),
-	SYSCALL_ENTRY(getdents),
-	SYSCALL_ENTRY(getpgrp),
-	SYSCALL_ENTRY(inotify_init),
-	SYSCALL_ENTRY(link),
-	SYSCALL_ENTRY(lstat),
-	SYSCALL_ENTRY(mkdir),
-	SYSCALL_ENTRY(mknod),
-	SYSCALL_ENTRY(pipe),
-	SYSCALL_ENTRY(poll),
-	SYSCALL_ENTRY(readlink),
-	SYSCALL_ENTRY(rename),
-	SYSCALL_ENTRY(rmdir),
-	SYSCALL_ENTRY(stat),
-	SYSCALL_ENTRY(symlink),
-	SYSCALL_ENTRY(unlink),
-	SYSCALL_ENTRY(ustat),
-	SYSCALL_ENTRY(utimes),
-	SYSCALL_ENTRY(vfork),
-#endif
-
-	/*
-	 * waitpid(2) is deprecated on most architectures, but still exists
-	 * on IA32.
-	 */
-#ifdef CONFIG_X86_32
-	SYSCALL_ENTRY(waitpid),
-#endif
-
-	/* IA32 uses the common socketcall(2) entrypoint for socket calls. */
-#ifdef CONFIG_X86_32
-	SYSCALL_ENTRY(socketcall),
-#else
-	SYSCALL_ENTRY(accept),
-	SYSCALL_ENTRY(accept4),
-	SYSCALL_ENTRY(bind),
-	SYSCALL_ENTRY(connect),
-	SYSCALL_ENTRY(getpeername),
-	SYSCALL_ENTRY(getsockname),
-	SYSCALL_ENTRY(getsockopt),
-	SYSCALL_ENTRY(listen),
-	SYSCALL_ENTRY(recvfrom),
-	SYSCALL_ENTRY(recvmsg),
-	SYSCALL_ENTRY(sendmsg),
-	SYSCALL_ENTRY(sendto),
-	SYSCALL_ENTRY(setsockopt),
-	SYSCALL_ENTRY(shutdown),
-	SYSCALL_ENTRY(socket),
-	SYSCALL_ENTRY(socketpair),
-	/*
-	 * recv(2)/send(2) are officially deprecated, but their entry-points
-	 * still exist on ARM.
-	 */
-#ifdef CONFIG_ARM
-	SYSCALL_ENTRY(recv),
-	SYSCALL_ENTRY(send),
-#endif
-#endif
-
-	/*
-	 * posix_fadvise(2) and sync_file_range(2) have ARM-specific wrappers
-	 * to deal with register alignment.
-	 */
-#ifdef CONFIG_ARM
-	SYSCALL_ENTRY(arm_fadvise64_64),
-	SYSCALL_ENTRY(sync_file_range2),
-#else
-#ifdef CONFIG_X86_32
-	SYSCALL_ENTRY(fadvise64_64),
-#endif
-	SYSCALL_ENTRY(fadvise64),
-	SYSCALL_ENTRY(sync_file_range),
-#endif
-
-	/* 64-bit only syscalls. */
-#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
-	SYSCALL_ENTRY(fchown),
-	SYSCALL_ENTRY(getegid),
-	SYSCALL_ENTRY(geteuid),
-	SYSCALL_ENTRY(getgid),
-	SYSCALL_ENTRY(getgroups),
-	SYSCALL_ENTRY(getresgid),
-	SYSCALL_ENTRY(getresuid),
-	SYSCALL_ENTRY(getuid),
-	SYSCALL_ENTRY(newfstatat),
-	SYSCALL_ENTRY(mmap),
-	SYSCALL_ENTRY(setgid),
-	SYSCALL_ENTRY(setgroups),
-	SYSCALL_ENTRY(setregid),
-	SYSCALL_ENTRY(setresgid),
-	SYSCALL_ENTRY(setresuid),
-	SYSCALL_ENTRY(setreuid),
-	SYSCALL_ENTRY(setuid),
-	/*
-	 * chown(2), lchown(2), and select(2) are deprecated and not wired up
-	 * on ARM64.
-	 */
-#ifndef CONFIG_ARM64
-	SYSCALL_ENTRY(chown),
-	SYSCALL_ENTRY(lchown),
-	SYSCALL_ENTRY(select),
-#endif
-#endif
-
-	/* 32-bit only syscalls. */
-#if defined(CONFIG_ARM) || defined(CONFIG_X86_32)
-	SYSCALL_ENTRY(chown32),
-	SYSCALL_ENTRY(fchown32),
-	SYSCALL_ENTRY(fcntl64),
-	SYSCALL_ENTRY(fstat64),
-	SYSCALL_ENTRY(fstatat64),
-	SYSCALL_ENTRY(fstatfs64),
-	SYSCALL_ENTRY(ftruncate64),
-	SYSCALL_ENTRY(getegid32),
-	SYSCALL_ENTRY(geteuid32),
-	SYSCALL_ENTRY(getgid32),
-	SYSCALL_ENTRY(getgroups32),
-	SYSCALL_ENTRY(getresgid32),
-	SYSCALL_ENTRY(getresuid32),
-	SYSCALL_ENTRY(getuid32),
-	SYSCALL_ENTRY(lchown32),
-	SYSCALL_ENTRY(lstat64),
-	SYSCALL_ENTRY(mmap2),
-	SYSCALL_ENTRY(_newselect),
-	SYSCALL_ENTRY(_llseek),
-	SYSCALL_ENTRY(sigaction),
-	SYSCALL_ENTRY(sigpending),
-	SYSCALL_ENTRY(sigprocmask),
-	SYSCALL_ENTRY(sigreturn),
-	SYSCALL_ENTRY(sigsuspend),
-	SYSCALL_ENTRY(sendfile64),
-	SYSCALL_ENTRY(setgid32),
-	SYSCALL_ENTRY(setgroups32),
-	SYSCALL_ENTRY(setregid32),
-	SYSCALL_ENTRY(setresgid32),
-	SYSCALL_ENTRY(setresuid32),
-	SYSCALL_ENTRY(setreuid32),
-	SYSCALL_ENTRY(setuid32),
-	SYSCALL_ENTRY(stat64),
-	SYSCALL_ENTRY(statfs64),
-	SYSCALL_ENTRY(truncate64),
-	SYSCALL_ENTRY(ugetrlimit),
-#endif
-
-	/* X86-specific syscalls. */
-#ifdef CONFIG_X86
-	SYSCALL_ENTRY(modify_ldt),
-	SYSCALL_ENTRY(set_thread_area),
-#endif
-
-#ifdef CONFIG_X86_64
-	SYSCALL_ENTRY(arch_prctl),
-#endif
-
-}; /* end android whitelist */
-
-static struct syscall_whitelist_entry third_party_whitelist[] = {
-	SYSCALL_ENTRY(brk),
-	SYSCALL_ENTRY(chdir),
-	SYSCALL_ENTRY(clock_gettime),
-	SYSCALL_ENTRY(clone),
-	SYSCALL_ENTRY(close),
-	SYSCALL_ENTRY(dup),
-	SYSCALL_ENTRY(execve),
-	SYSCALL_ENTRY(exit),
-	SYSCALL_ENTRY(exit_group),
-	SYSCALL_ENTRY(fcntl),
-	SYSCALL_ENTRY(fstat),
-	SYSCALL_ENTRY(futex),
-	SYSCALL_ENTRY(getcwd),
-	SYSCALL_ENTRY(getdents64),
-	SYSCALL_ENTRY(getpid),
-	SYSCALL_ENTRY(getpgid),
-	SYSCALL_ENTRY(getppid),
-	SYSCALL_ENTRY(getpriority),
-	SYSCALL_ENTRY(getrlimit),
-	SYSCALL_ENTRY(getsid),
-	SYSCALL_ENTRY(gettimeofday),
-	SYSCALL_ENTRY(ioctl),
-	SYSCALL_ENTRY(lseek),
-	SYSCALL_ENTRY(madvise),
-	SYSCALL_ENTRY(mprotect),
-	SYSCALL_ENTRY(munmap),
-	SYSCALL_ENTRY(nanosleep),
-	SYSCALL_ENTRY(openat),
-	SYSCALL_ENTRY(prlimit64),
-	SYSCALL_ENTRY(read),
-	SYSCALL_ENTRY(rt_sigaction),
-	SYSCALL_ENTRY(rt_sigprocmask),
-	SYSCALL_ENTRY(rt_sigreturn),
-	SYSCALL_ENTRY(sendfile),
-	SYSCALL_ENTRY(set_robust_list),
-	SYSCALL_ENTRY(set_tid_address),
-	SYSCALL_ENTRY(setpgid),
-	SYSCALL_ENTRY(setpriority),
-	SYSCALL_ENTRY(setsid),
-	SYSCALL_ENTRY(syslog),
-	SYSCALL_ENTRY(statfs),
-	SYSCALL_ENTRY(umask),
-	SYSCALL_ENTRY(uname),
-	SYSCALL_ENTRY(wait4),
-	SYSCALL_ENTRY(write),
-	SYSCALL_ENTRY(writev),
-
-	/*
-	 * Deprecated syscalls which are not wired up on new architectures
-	 * such as ARM64.
-	 */
-#ifndef CONFIG_ARM64
-	SYSCALL_ENTRY(access),
-	SYSCALL_ENTRY(creat),
-	SYSCALL_ENTRY(dup2),
-	SYSCALL_ENTRY(getdents),
-	SYSCALL_ENTRY(getpgrp),
-	SYSCALL_ENTRY(lstat),
-	SYSCALL_ENTRY(mkdir),
-	SYSCALL_ENTRY(open),
-	SYSCALL_ENTRY(pipe),
-	SYSCALL_ENTRY(poll),
-	SYSCALL_ENTRY(readlink),
-	SYSCALL_ENTRY(stat),
-	SYSCALL_ENTRY(unlink),
-#endif
-
-	/* 32-bit only syscalls. */
-#if defined(CONFIG_ARM) || defined(CONFIG_X86_32)
-	SYSCALL_ENTRY(fcntl64),
-	SYSCALL_ENTRY(fstat64),
-	SYSCALL_ENTRY(geteuid32),
-	SYSCALL_ENTRY(getuid32),
-	SYSCALL_ENTRY(_llseek),
-	SYSCALL_ENTRY(lstat64),
-	SYSCALL_ENTRY(_newselect),
-	SYSCALL_ENTRY(mmap2),
-	SYSCALL_ENTRY(stat64),
-	SYSCALL_ENTRY(ugetrlimit),
-#endif
-
-
-	/* IA32 uses the common socketcall(2) entrypoint for socket calls. */
-#ifdef CONFIG_X86_32
-	SYSCALL_ENTRY(socketcall),
-#else
-	SYSCALL_ENTRY(accept),
-	SYSCALL_ENTRY(bind),
-	SYSCALL_ENTRY(connect),
-	SYSCALL_ENTRY(listen),
-	SYSCALL_ENTRY(recvfrom),
-	SYSCALL_ENTRY(recvmsg),
-	SYSCALL_ENTRY(sendmsg),
-	SYSCALL_ENTRY(sendto),
-	SYSCALL_ENTRY(setsockopt),
-	SYSCALL_ENTRY(socket),
-	SYSCALL_ENTRY(socketpair),
-#endif
-
-	/* 64-bit only syscalls. */
-#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
-	SYSCALL_ENTRY(getegid),
-	SYSCALL_ENTRY(geteuid),
-	SYSCALL_ENTRY(getgid),
-	SYSCALL_ENTRY(getuid),
-	SYSCALL_ENTRY(mmap),
-	SYSCALL_ENTRY(setgid),
-	SYSCALL_ENTRY(setuid),
-	/*
-	 * chown(2), lchown(2), and select(2) are deprecated and not wired up
-	 * on ARM64.
-	 */
-#ifndef CONFIG_ARM64
-	SYSCALL_ENTRY(select),
-#endif
-#endif
-
-	/* X86-specific syscalls. */
-#ifdef CONFIG_X86
-	SYSCALL_ENTRY(arch_prctl),
-#endif
-};
-
-
 #ifdef CONFIG_COMPAT
-static struct syscall_whitelist_entry read_write_test_compat_whitelist[] = {
-	COMPAT_SYSCALL_ENTRY(exit),
-	COMPAT_SYSCALL_ENTRY(open),
-	COMPAT_SYSCALL_ENTRY(close),
-	COMPAT_SYSCALL_ENTRY(read),
-	COMPAT_SYSCALL_ENTRY(write),
-	COMPAT_SYSCALL_ENTRY_ALT(prctl, alt_sys_prctl),
-};
-
 static asmlinkage long android_compat_adjtimex(struct compat_timex __user *buf)
 {
 	struct compat_timex kbuf;
@@ -1103,417 +275,16 @@
 		return -EPERM;
 	return compat_sys_clock_adjtime(which_clock, buf);
 }
-
-static struct syscall_whitelist_entry android_compat_whitelist[] = {
-	COMPAT_SYSCALL_ENTRY(access),
-	COMPAT_SYSCALL_ENTRY_ALT(adjtimex, android_compat_adjtimex),
-	COMPAT_SYSCALL_ENTRY(brk),
-	COMPAT_SYSCALL_ENTRY(capget),
-	COMPAT_SYSCALL_ENTRY(capset),
-	COMPAT_SYSCALL_ENTRY(chdir),
-	COMPAT_SYSCALL_ENTRY(chmod),
-	COMPAT_SYSCALL_ENTRY_ALT(clock_adjtime, android_compat_clock_adjtime),
-	COMPAT_SYSCALL_ENTRY(clock_getres),
-	COMPAT_SYSCALL_ENTRY(clock_gettime),
-	COMPAT_SYSCALL_ENTRY(clock_nanosleep),
-	COMPAT_SYSCALL_ENTRY(clock_settime),
-	COMPAT_SYSCALL_ENTRY(clone),
-	COMPAT_SYSCALL_ENTRY(close),
-	COMPAT_SYSCALL_ENTRY(creat),
-	COMPAT_SYSCALL_ENTRY(dup),
-	COMPAT_SYSCALL_ENTRY(dup2),
-	COMPAT_SYSCALL_ENTRY(dup3),
-	COMPAT_SYSCALL_ENTRY(epoll_create),
-	COMPAT_SYSCALL_ENTRY(epoll_create1),
-	COMPAT_SYSCALL_ENTRY(epoll_ctl),
-	COMPAT_SYSCALL_ENTRY(epoll_wait),
-	COMPAT_SYSCALL_ENTRY(epoll_pwait),
-	COMPAT_SYSCALL_ENTRY(eventfd),
-	COMPAT_SYSCALL_ENTRY(eventfd2),
-	COMPAT_SYSCALL_ENTRY(execve),
-	COMPAT_SYSCALL_ENTRY(exit),
-	COMPAT_SYSCALL_ENTRY(exit_group),
-	COMPAT_SYSCALL_ENTRY(faccessat),
-	COMPAT_SYSCALL_ENTRY(fallocate),
-	COMPAT_SYSCALL_ENTRY(fchdir),
-	COMPAT_SYSCALL_ENTRY(fchmod),
-	COMPAT_SYSCALL_ENTRY(fchmodat),
-	COMPAT_SYSCALL_ENTRY(fchownat),
-	COMPAT_SYSCALL_ENTRY(fcntl),
-	COMPAT_SYSCALL_ENTRY(fdatasync),
-	COMPAT_SYSCALL_ENTRY(fgetxattr),
-	COMPAT_SYSCALL_ENTRY(flistxattr),
-	COMPAT_SYSCALL_ENTRY(flock),
-	COMPAT_SYSCALL_ENTRY(fork),
-	COMPAT_SYSCALL_ENTRY(fremovexattr),
-	COMPAT_SYSCALL_ENTRY(fsetxattr),
-	COMPAT_SYSCALL_ENTRY(fstat),
-	COMPAT_SYSCALL_ENTRY(fstatfs),
-	COMPAT_SYSCALL_ENTRY(fsync),
-	COMPAT_SYSCALL_ENTRY(ftruncate),
-	COMPAT_SYSCALL_ENTRY(futex),
-	COMPAT_SYSCALL_ENTRY(futimesat),
-	COMPAT_SYSCALL_ENTRY_ALT(getcpu, android_getcpu),
-	COMPAT_SYSCALL_ENTRY(getcwd),
-	COMPAT_SYSCALL_ENTRY(getdents),
-	COMPAT_SYSCALL_ENTRY(getdents64),
-	COMPAT_SYSCALL_ENTRY(getpgid),
-	COMPAT_SYSCALL_ENTRY(getpgrp),
-	COMPAT_SYSCALL_ENTRY(getpid),
-	COMPAT_SYSCALL_ENTRY(getppid),
-	COMPAT_SYSCALL_ENTRY_ALT(getpriority, android_getpriority),
-	COMPAT_SYSCALL_ENTRY(getrusage),
-	COMPAT_SYSCALL_ENTRY(getsid),
-	COMPAT_SYSCALL_ENTRY(gettid),
-	COMPAT_SYSCALL_ENTRY(gettimeofday),
-	COMPAT_SYSCALL_ENTRY(getxattr),
-	COMPAT_SYSCALL_ENTRY(inotify_add_watch),
-	COMPAT_SYSCALL_ENTRY(inotify_init),
-	COMPAT_SYSCALL_ENTRY(inotify_init1),
-	COMPAT_SYSCALL_ENTRY(inotify_rm_watch),
-	COMPAT_SYSCALL_ENTRY(ioctl),
-	COMPAT_SYSCALL_ENTRY(ioprio_set),
-	COMPAT_SYSCALL_ENTRY(kill),
-	COMPAT_SYSCALL_ENTRY(lgetxattr),
-	COMPAT_SYSCALL_ENTRY(link),
-	COMPAT_SYSCALL_ENTRY(linkat),
-	COMPAT_SYSCALL_ENTRY(listxattr),
-	COMPAT_SYSCALL_ENTRY(llistxattr),
-	COMPAT_SYSCALL_ENTRY(lremovexattr),
-	COMPAT_SYSCALL_ENTRY(lseek),
-	COMPAT_SYSCALL_ENTRY(lsetxattr),
-	COMPAT_SYSCALL_ENTRY(lstat),
-	COMPAT_SYSCALL_ENTRY(madvise),
-	COMPAT_SYSCALL_ENTRY(mincore),
-	COMPAT_SYSCALL_ENTRY(mkdir),
-	COMPAT_SYSCALL_ENTRY(mkdirat),
-	COMPAT_SYSCALL_ENTRY(mknod),
-	COMPAT_SYSCALL_ENTRY(mknodat),
-	COMPAT_SYSCALL_ENTRY(mlock),
-	COMPAT_SYSCALL_ENTRY(mlockall),
-	COMPAT_SYSCALL_ENTRY(munlock),
-	COMPAT_SYSCALL_ENTRY(munlockall),
-	COMPAT_SYSCALL_ENTRY(mount),
-	COMPAT_SYSCALL_ENTRY(mprotect),
-	COMPAT_SYSCALL_ENTRY(mremap),
-	COMPAT_SYSCALL_ENTRY(msync),
-	COMPAT_SYSCALL_ENTRY(munmap),
-	COMPAT_SYSCALL_ENTRY(name_to_handle_at),
-	COMPAT_SYSCALL_ENTRY(nanosleep),
-	COMPAT_SYSCALL_ENTRY(open),
-	COMPAT_SYSCALL_ENTRY(open_by_handle_at),
-	COMPAT_SYSCALL_ENTRY(openat),
-	COMPAT_SYSCALL_ENTRY_ALT(perf_event_open, android_perf_event_open),
-	COMPAT_SYSCALL_ENTRY(personality),
-	COMPAT_SYSCALL_ENTRY(pipe),
-	COMPAT_SYSCALL_ENTRY(pipe2),
-	COMPAT_SYSCALL_ENTRY(poll),
-	COMPAT_SYSCALL_ENTRY(ppoll),
-	COMPAT_SYSCALL_ENTRY_ALT(prctl, alt_sys_prctl),
-	COMPAT_SYSCALL_ENTRY(pread64),
-	COMPAT_SYSCALL_ENTRY(preadv),
-	COMPAT_SYSCALL_ENTRY(prlimit64),
-	COMPAT_SYSCALL_ENTRY(process_vm_readv),
-	COMPAT_SYSCALL_ENTRY(process_vm_writev),
-	COMPAT_SYSCALL_ENTRY(pselect6),
-	COMPAT_SYSCALL_ENTRY(ptrace),
-	COMPAT_SYSCALL_ENTRY(pwrite64),
-	COMPAT_SYSCALL_ENTRY(pwritev),
-	COMPAT_SYSCALL_ENTRY(read),
-	COMPAT_SYSCALL_ENTRY(readahead),
-	COMPAT_SYSCALL_ENTRY(readv),
-	COMPAT_SYSCALL_ENTRY(readlink),
-	COMPAT_SYSCALL_ENTRY(readlinkat),
-	COMPAT_SYSCALL_ENTRY(recvmmsg),
-	COMPAT_SYSCALL_ENTRY(remap_file_pages),
-	COMPAT_SYSCALL_ENTRY(removexattr),
-	COMPAT_SYSCALL_ENTRY(rename),
-	COMPAT_SYSCALL_ENTRY(renameat),
-	COMPAT_SYSCALL_ENTRY(restart_syscall),
-	COMPAT_SYSCALL_ENTRY(rmdir),
-	COMPAT_SYSCALL_ENTRY(rt_sigaction),
-	COMPAT_SYSCALL_ENTRY(rt_sigpending),
-	COMPAT_SYSCALL_ENTRY(rt_sigprocmask),
-	COMPAT_SYSCALL_ENTRY(rt_sigqueueinfo),
-	COMPAT_SYSCALL_ENTRY(rt_sigreturn),
-	COMPAT_SYSCALL_ENTRY(rt_sigsuspend),
-	COMPAT_SYSCALL_ENTRY(rt_sigtimedwait),
-	COMPAT_SYSCALL_ENTRY(rt_tgsigqueueinfo),
-	COMPAT_SYSCALL_ENTRY(sched_get_priority_max),
-	COMPAT_SYSCALL_ENTRY(sched_get_priority_min),
-	COMPAT_SYSCALL_ENTRY(sched_getaffinity),
-	COMPAT_SYSCALL_ENTRY(sched_getparam),
-	COMPAT_SYSCALL_ENTRY(sched_getscheduler),
-	COMPAT_SYSCALL_ENTRY(sched_setaffinity),
-	COMPAT_SYSCALL_ENTRY_ALT(sched_setscheduler,
-				 android_sched_setscheduler),
-	COMPAT_SYSCALL_ENTRY(sched_yield),
-	COMPAT_SYSCALL_ENTRY(seccomp),
-	COMPAT_SYSCALL_ENTRY(sendfile),
-	COMPAT_SYSCALL_ENTRY(sendfile64),
-	COMPAT_SYSCALL_ENTRY(sendmmsg),
-	COMPAT_SYSCALL_ENTRY(set_robust_list),
-	COMPAT_SYSCALL_ENTRY(set_tid_address),
-	COMPAT_SYSCALL_ENTRY(setitimer),
-	COMPAT_SYSCALL_ENTRY(setns),
-	COMPAT_SYSCALL_ENTRY(setpgid),
-	COMPAT_SYSCALL_ENTRY_ALT(setpriority, android_setpriority),
-	COMPAT_SYSCALL_ENTRY(setrlimit),
-	COMPAT_SYSCALL_ENTRY(setsid),
-	COMPAT_SYSCALL_ENTRY(settimeofday),
-	COMPAT_SYSCALL_ENTRY(setxattr),
-	COMPAT_SYSCALL_ENTRY(signalfd4),
-	COMPAT_SYSCALL_ENTRY(sigaltstack),
-	COMPAT_SYSCALL_ENTRY(splice),
-	COMPAT_SYSCALL_ENTRY(stat),
-	COMPAT_SYSCALL_ENTRY(statfs),
-	COMPAT_SYSCALL_ENTRY(symlink),
-	COMPAT_SYSCALL_ENTRY(symlinkat),
-	COMPAT_SYSCALL_ENTRY(sysinfo),
-	COMPAT_SYSCALL_ENTRY(syslog),
-	COMPAT_SYSCALL_ENTRY(tgkill),
-	COMPAT_SYSCALL_ENTRY(tee),
-	COMPAT_SYSCALL_ENTRY(tkill),
-	COMPAT_SYSCALL_ENTRY(time),
-	COMPAT_SYSCALL_ENTRY(timer_create),
-	COMPAT_SYSCALL_ENTRY(timer_delete),
-	COMPAT_SYSCALL_ENTRY(timer_gettime),
-	COMPAT_SYSCALL_ENTRY(timer_getoverrun),
-	COMPAT_SYSCALL_ENTRY(timer_settime),
-	COMPAT_SYSCALL_ENTRY(timerfd_create),
-	COMPAT_SYSCALL_ENTRY(timerfd_gettime),
-	COMPAT_SYSCALL_ENTRY(timerfd_settime),
-	COMPAT_SYSCALL_ENTRY(times),
-	COMPAT_SYSCALL_ENTRY(truncate),
-	COMPAT_SYSCALL_ENTRY(umask),
-	COMPAT_SYSCALL_ENTRY(umount2),
-	COMPAT_SYSCALL_ENTRY(uname),
-	COMPAT_SYSCALL_ENTRY(unlink),
-	COMPAT_SYSCALL_ENTRY(unlinkat),
-	COMPAT_SYSCALL_ENTRY(unshare),
-	COMPAT_SYSCALL_ENTRY(ustat),
-	COMPAT_SYSCALL_ENTRY(utimensat),
-	COMPAT_SYSCALL_ENTRY(utimes),
-	COMPAT_SYSCALL_ENTRY(vfork),
-	COMPAT_SYSCALL_ENTRY(vmsplice),
-	COMPAT_SYSCALL_ENTRY(wait4),
-	COMPAT_SYSCALL_ENTRY(waitid),
-	COMPAT_SYSCALL_ENTRY(write),
-	COMPAT_SYSCALL_ENTRY(writev),
-	COMPAT_SYSCALL_ENTRY(chown32),
-	COMPAT_SYSCALL_ENTRY(fchown32),
-	COMPAT_SYSCALL_ENTRY(fcntl64),
-	COMPAT_SYSCALL_ENTRY(fstat64),
-	COMPAT_SYSCALL_ENTRY(fstatat64),
-	COMPAT_SYSCALL_ENTRY(fstatfs64),
-	COMPAT_SYSCALL_ENTRY(ftruncate64),
-	COMPAT_SYSCALL_ENTRY(getegid),
-	COMPAT_SYSCALL_ENTRY(getegid32),
-	COMPAT_SYSCALL_ENTRY(geteuid),
-	COMPAT_SYSCALL_ENTRY(geteuid32),
-	COMPAT_SYSCALL_ENTRY(getgid),
-	COMPAT_SYSCALL_ENTRY(getgid32),
-	COMPAT_SYSCALL_ENTRY(getgroups32),
-	COMPAT_SYSCALL_ENTRY(getresgid32),
-	COMPAT_SYSCALL_ENTRY(getresuid32),
-	COMPAT_SYSCALL_ENTRY(getuid),
-	COMPAT_SYSCALL_ENTRY(getuid32),
-	COMPAT_SYSCALL_ENTRY(lchown32),
-	COMPAT_SYSCALL_ENTRY(lstat64),
-	COMPAT_SYSCALL_ENTRY(mmap2),
-	COMPAT_SYSCALL_ENTRY(_newselect),
-	COMPAT_SYSCALL_ENTRY(_llseek),
-	COMPAT_SYSCALL_ENTRY(sigaction),
-	COMPAT_SYSCALL_ENTRY(sigpending),
-	COMPAT_SYSCALL_ENTRY(sigprocmask),
-	COMPAT_SYSCALL_ENTRY(sigreturn),
-	COMPAT_SYSCALL_ENTRY(sigsuspend),
-	COMPAT_SYSCALL_ENTRY(setgid32),
-	COMPAT_SYSCALL_ENTRY(setgroups32),
-	COMPAT_SYSCALL_ENTRY(setregid32),
-	COMPAT_SYSCALL_ENTRY(setresgid32),
-	COMPAT_SYSCALL_ENTRY(setresuid32),
-	COMPAT_SYSCALL_ENTRY(setreuid32),
-	COMPAT_SYSCALL_ENTRY(setuid32),
-	COMPAT_SYSCALL_ENTRY(stat64),
-	COMPAT_SYSCALL_ENTRY(statfs64),
-	COMPAT_SYSCALL_ENTRY(truncate64),
-	COMPAT_SYSCALL_ENTRY(ugetrlimit),
-
-	/*
-	 * waitpid(2) is deprecated on most architectures, but still exists
-	 * on IA32.
-	 */
-#ifdef CONFIG_X86
-	COMPAT_SYSCALL_ENTRY(waitpid),
-#endif
-
-	/*
-	 * posix_fadvise(2) and sync_file_range(2) have ARM-specific wrappers
-	 * to deal with register alignment.
-	 */
-#ifdef CONFIG_ARM64
-	COMPAT_SYSCALL_ENTRY(arm_fadvise64_64),
-	COMPAT_SYSCALL_ENTRY(sync_file_range2),
-#else
-	COMPAT_SYSCALL_ENTRY(fadvise64_64),
-	COMPAT_SYSCALL_ENTRY(fadvise64),
-	COMPAT_SYSCALL_ENTRY(sync_file_range),
-#endif
-
-	/* IA32 uses the common socketcall(2) entrypoint for socket calls. */
-#ifdef CONFIG_X86
-	COMPAT_SYSCALL_ENTRY(socketcall),
-#else
-	COMPAT_SYSCALL_ENTRY(accept),
-	COMPAT_SYSCALL_ENTRY(accept4),
-	COMPAT_SYSCALL_ENTRY(bind),
-	COMPAT_SYSCALL_ENTRY(connect),
-	COMPAT_SYSCALL_ENTRY(getpeername),
-	COMPAT_SYSCALL_ENTRY(getsockname),
-	COMPAT_SYSCALL_ENTRY(getsockopt),
-	COMPAT_SYSCALL_ENTRY(listen),
-	COMPAT_SYSCALL_ENTRY(recvfrom),
-	COMPAT_SYSCALL_ENTRY(recvmsg),
-	COMPAT_SYSCALL_ENTRY(sendmsg),
-	COMPAT_SYSCALL_ENTRY(sendto),
-	COMPAT_SYSCALL_ENTRY(setsockopt),
-	COMPAT_SYSCALL_ENTRY(shutdown),
-	COMPAT_SYSCALL_ENTRY(socket),
-	COMPAT_SYSCALL_ENTRY(socketpair),
-	COMPAT_SYSCALL_ENTRY(recv),
-	COMPAT_SYSCALL_ENTRY(send),
-#endif
-
-	/*
-	 * getrlimit(2) is deprecated and not wired in the ARM compat table
-	 * on ARM64.
-	 */
-#ifndef CONFIG_ARM64
-	COMPAT_SYSCALL_ENTRY(getrlimit),
-#endif
-
-	/* x86-specific syscalls. */
-#ifdef CONFIG_X86
-	COMPAT_SYSCALL_ENTRY(modify_ldt),
-	COMPAT_SYSCALL_ENTRY(set_thread_area),
-#endif
-};
-
-static struct syscall_whitelist_entry third_party_compat_whitelist[] = {
-	COMPAT_SYSCALL_ENTRY(access),
-	COMPAT_SYSCALL_ENTRY(brk),
-	COMPAT_SYSCALL_ENTRY(chdir),
-	COMPAT_SYSCALL_ENTRY(clock_gettime),
-	COMPAT_SYSCALL_ENTRY(clone),
-	COMPAT_SYSCALL_ENTRY(close),
-	COMPAT_SYSCALL_ENTRY(creat),
-	COMPAT_SYSCALL_ENTRY(dup),
-	COMPAT_SYSCALL_ENTRY(dup2),
-	COMPAT_SYSCALL_ENTRY(execve),
-	COMPAT_SYSCALL_ENTRY(exit),
-	COMPAT_SYSCALL_ENTRY(exit_group),
-	COMPAT_SYSCALL_ENTRY(fcntl),
-	COMPAT_SYSCALL_ENTRY(fcntl64),
-	COMPAT_SYSCALL_ENTRY(fstat),
-	COMPAT_SYSCALL_ENTRY(fstat64),
-	COMPAT_SYSCALL_ENTRY(futex),
-	COMPAT_SYSCALL_ENTRY(getcwd),
-	COMPAT_SYSCALL_ENTRY(getdents),
-	COMPAT_SYSCALL_ENTRY(getdents64),
-	COMPAT_SYSCALL_ENTRY(getegid),
-	COMPAT_SYSCALL_ENTRY(geteuid),
-	COMPAT_SYSCALL_ENTRY(geteuid32),
-	COMPAT_SYSCALL_ENTRY(getgid),
-	COMPAT_SYSCALL_ENTRY(getpgid),
-	COMPAT_SYSCALL_ENTRY(getpgrp),
-	COMPAT_SYSCALL_ENTRY(getpid),
-	COMPAT_SYSCALL_ENTRY(getpriority),
-	COMPAT_SYSCALL_ENTRY(getppid),
-	COMPAT_SYSCALL_ENTRY(getsid),
-	COMPAT_SYSCALL_ENTRY(gettimeofday),
-	COMPAT_SYSCALL_ENTRY(getuid),
-	COMPAT_SYSCALL_ENTRY(getuid32),
-	COMPAT_SYSCALL_ENTRY(ioctl),
-	COMPAT_SYSCALL_ENTRY(_llseek),
-	COMPAT_SYSCALL_ENTRY(lseek),
-	COMPAT_SYSCALL_ENTRY(lstat),
-	COMPAT_SYSCALL_ENTRY(lstat64),
-	COMPAT_SYSCALL_ENTRY(madvise),
-	COMPAT_SYSCALL_ENTRY(mkdir),
-	COMPAT_SYSCALL_ENTRY(mmap2),
-	COMPAT_SYSCALL_ENTRY(mprotect),
-	COMPAT_SYSCALL_ENTRY(munmap),
-	COMPAT_SYSCALL_ENTRY(nanosleep),
-	COMPAT_SYSCALL_ENTRY(_newselect),
-	COMPAT_SYSCALL_ENTRY(open),
-	COMPAT_SYSCALL_ENTRY(openat),
-	COMPAT_SYSCALL_ENTRY(pipe),
-	COMPAT_SYSCALL_ENTRY(poll),
-	COMPAT_SYSCALL_ENTRY(prlimit64),
-	COMPAT_SYSCALL_ENTRY(read),
-	COMPAT_SYSCALL_ENTRY(readlink),
-	COMPAT_SYSCALL_ENTRY(rt_sigaction),
-	COMPAT_SYSCALL_ENTRY(rt_sigprocmask),
-	COMPAT_SYSCALL_ENTRY(rt_sigreturn),
-	COMPAT_SYSCALL_ENTRY(sendfile),
-	COMPAT_SYSCALL_ENTRY(set_robust_list),
-	COMPAT_SYSCALL_ENTRY(set_tid_address),
-	COMPAT_SYSCALL_ENTRY(setgid32),
-	COMPAT_SYSCALL_ENTRY(setuid32),
-	COMPAT_SYSCALL_ENTRY(setpgid),
-	COMPAT_SYSCALL_ENTRY(setpriority),
-	COMPAT_SYSCALL_ENTRY(setsid),
-	COMPAT_SYSCALL_ENTRY(stat),
-	COMPAT_SYSCALL_ENTRY(stat64),
-	COMPAT_SYSCALL_ENTRY(statfs),
-	COMPAT_SYSCALL_ENTRY(syslog),
-	COMPAT_SYSCALL_ENTRY(ugetrlimit),
-	COMPAT_SYSCALL_ENTRY(umask),
-	COMPAT_SYSCALL_ENTRY(uname),
-	COMPAT_SYSCALL_ENTRY(unlink),
-	COMPAT_SYSCALL_ENTRY(wait4),
-	COMPAT_SYSCALL_ENTRY(write),
-	COMPAT_SYSCALL_ENTRY(writev),
-
-	/* IA32 uses the common socketcall(2) entrypoint for socket calls. */
-#ifdef CONFIG_X86
-	COMPAT_SYSCALL_ENTRY(socketcall),
-#else
-	COMPAT_SYSCALL_ENTRY(accept),
-	COMPAT_SYSCALL_ENTRY(bind),
-	COMPAT_SYSCALL_ENTRY(connect),
-	COMPAT_SYSCALL_ENTRY(listen),
-	COMPAT_SYSCALL_ENTRY(recvfrom),
-	COMPAT_SYSCALL_ENTRY(recvmsg),
-	COMPAT_SYSCALL_ENTRY(sendmsg),
-	COMPAT_SYSCALL_ENTRY(sendto),
-	COMPAT_SYSCALL_ENTRY(setsockopt),
-	COMPAT_SYSCALL_ENTRY(socket),
-	COMPAT_SYSCALL_ENTRY(socketpair),
-#endif
-
-	/*
-	 * getrlimit(2) is deprecated and not wired in the ARM compat table
-	 * on ARM64.
-	 */
-#ifndef CONFIG_ARM64
-	COMPAT_SYSCALL_ENTRY(getrlimit),
-#endif
-
-	/* X86-specific syscalls. */
-#ifdef CONFIG_X86
-	SYSCALL_ENTRY(arch_prctl),
-#endif
-};
-#endif
+#endif /* CONFIG_COMPAT */
 
 static struct syscall_whitelist whitelists[] = {
 	SYSCALL_WHITELIST(read_write_test),
 	SYSCALL_WHITELIST(android),
 	PERMISSIVE_SYSCALL_WHITELIST(android),
 	SYSCALL_WHITELIST(third_party),
-	PERMISSIVE_SYSCALL_WHITELIST(third_party)
+	PERMISSIVE_SYSCALL_WHITELIST(third_party),
+	SYSCALL_WHITELIST(complete),
+	PERMISSIVE_SYSCALL_WHITELIST(complete)
 };
 
 static int alt_syscall_apply_whitelist(const struct syscall_whitelist *wl,
@@ -1580,7 +351,7 @@
 {
 	return 0;
 }
-#endif
+#endif /* CONFIG_COMPAT */
 
 static int alt_syscall_init_one(const struct syscall_whitelist *wl)
 {
diff --git a/security/chromiumos/alt-syscall.h b/security/chromiumos/alt-syscall.h
new file mode 100644
index 0000000..f22ceff
--- /dev/null
+++ b/security/chromiumos/alt-syscall.h
@@ -0,0 +1,375 @@
+/*
+ * Linux Security Module for Chromium OS
+ *
+ * Copyright 2018 Google LLC. All Rights Reserved
+ *
+ * Authors:
+ *      Micah Morton <mortonm@chromium.org>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef ALT_SYSCALL_H
+#define ALT_SYSCALL_H
+
+/*
+ * NOTE: this file uses the 'static' keyword for variable and function
+ * definitions because alt-syscall.c is the only .c file that is expected to
+ * include this header. Definitions were pulled out from alt-syscall.c into
+ * this header and the *_whitelists.h headers for the sake of readability.
+ */
+
+static int allow_devmode_syscalls;
+
+#ifdef CONFIG_SYSCTL
+static int zero;
+static int one = 1;
+
+static struct ctl_path chromiumos_sysctl_path[] = {
+        { .procname = "kernel", },
+        { .procname = "chromiumos", },
+        { .procname = "alt_syscall", },
+        { }
+};
+
+static struct ctl_table chromiumos_sysctl_table[] = {
+        {
+                .procname       = "allow_devmode_syscalls",
+                .data           = &allow_devmode_syscalls,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = proc_dointvec_minmax,
+                .extra1         = &zero,
+                .extra2         = &one,
+        },
+        { }
+};
+#endif
+
+struct syscall_whitelist_entry {
+        unsigned int nr;
+        sys_call_ptr_t alt;
+};
+
+struct syscall_whitelist {
+        const char *name;
+        const struct syscall_whitelist_entry *whitelist;
+        unsigned int nr_whitelist;
+#ifdef CONFIG_COMPAT
+        const struct syscall_whitelist_entry *compat_whitelist;
+        unsigned int nr_compat_whitelist;
+#endif
+        bool permissive;
+};
+
+static struct alt_sys_call_table default_table;
+
+#define SYSCALL_ENTRY_ALT(name, func)                                   \
+        {                                                               \
+                .nr = __NR_ ## name,                                    \
+                .alt = (sys_call_ptr_t)func,                            \
+        }
+#define SYSCALL_ENTRY(name) SYSCALL_ENTRY_ALT(name, NULL)
+#define COMPAT_SYSCALL_ENTRY_ALT(name, func)                            \
+        {                                                               \
+                .nr = __NR_compat_ ## name,                             \
+                .alt = (sys_call_ptr_t)func,                            \
+        }
+#define COMPAT_SYSCALL_ENTRY(name) COMPAT_SYSCALL_ENTRY_ALT(name, NULL)
+
+/*
+ * If an alt_syscall table allows prctl(), override it to prevent a process
+ * from changing its syscall table.
+ */
+static asmlinkage long alt_sys_prctl(int option, unsigned long arg2,
+                                     unsigned long arg3, unsigned long arg4,
+                                     unsigned long arg5);
+
+#ifdef CONFIG_COMPAT
+#define SYSCALL_WHITELIST_COMPAT(x)                                     \
+        .compat_whitelist = x ## _compat_whitelist,                     \
+        .nr_compat_whitelist = ARRAY_SIZE(x ## _compat_whitelist),
+#else
+#define SYSCALL_WHITELIST_COMPAT(x)
+#endif
+
+#define SYSCALL_WHITELIST(x)                                            \
+        {                                                               \
+                .name = #x,                                             \
+                .whitelist = x ## _whitelist,                           \
+                .nr_whitelist = ARRAY_SIZE(x ## _whitelist),            \
+                SYSCALL_WHITELIST_COMPAT(x)                             \
+        }
+
+#define PERMISSIVE_SYSCALL_WHITELIST(x)                                 \
+        {                                                               \
+                .name = #x "_permissive",                               \
+                .permissive = true,                                     \
+                .whitelist = x ## _whitelist,                           \
+                .nr_whitelist = ARRAY_SIZE(x ## _whitelist),            \
+                SYSCALL_WHITELIST_COMPAT(x)                             \
+        }
+
+#ifdef CONFIG_COMPAT
+#ifdef CONFIG_X86_64
+#define __NR_compat_access      __NR_ia32_access
+#define __NR_compat_adjtimex    __NR_ia32_adjtimex
+#define __NR_compat_brk __NR_ia32_brk
+#define __NR_compat_capget      __NR_ia32_capget
+#define __NR_compat_capset      __NR_ia32_capset
+#define __NR_compat_chdir       __NR_ia32_chdir
+#define __NR_compat_chmod       __NR_ia32_chmod
+#define __NR_compat_clock_adjtime       __NR_ia32_clock_adjtime
+#define __NR_compat_clock_getres        __NR_ia32_clock_getres
+#define __NR_compat_clock_gettime       __NR_ia32_clock_gettime
+#define __NR_compat_clock_nanosleep     __NR_ia32_clock_nanosleep
+#define __NR_compat_clock_settime       __NR_ia32_clock_settime
+#define __NR_compat_clone       __NR_ia32_clone
+#define __NR_compat_close       __NR_ia32_close
+#define __NR_compat_creat       __NR_ia32_creat
+#define __NR_compat_dup __NR_ia32_dup
+#define __NR_compat_dup2        __NR_ia32_dup2
+#define __NR_compat_dup3        __NR_ia32_dup3
+#define __NR_compat_epoll_create        __NR_ia32_epoll_create
+#define __NR_compat_epoll_create1       __NR_ia32_epoll_create1
+#define __NR_compat_epoll_ctl   __NR_ia32_epoll_ctl
+#define __NR_compat_epoll_wait  __NR_ia32_epoll_wait
+#define __NR_compat_epoll_pwait __NR_ia32_epoll_pwait
+#define __NR_compat_eventfd     __NR_ia32_eventfd
+#define __NR_compat_eventfd2    __NR_ia32_eventfd2
+#define __NR_compat_execve      __NR_ia32_execve
+#define __NR_compat_exit        __NR_ia32_exit
+#define __NR_compat_exit_group  __NR_ia32_exit_group
+#define __NR_compat_faccessat   __NR_ia32_faccessat
+#define __NR_compat_fallocate   __NR_ia32_fallocate
+#define __NR_compat_fchdir      __NR_ia32_fchdir
+#define __NR_compat_fchmod      __NR_ia32_fchmod
+#define __NR_compat_fchmodat    __NR_ia32_fchmodat
+#define __NR_compat_fchown      __NR_ia32_fchown
+#define __NR_compat_fchownat    __NR_ia32_fchownat
+#define __NR_compat_fcntl       __NR_ia32_fcntl
+#define __NR_compat_fdatasync   __NR_ia32_fdatasync
+#define __NR_compat_fgetxattr   __NR_ia32_fgetxattr
+#define __NR_compat_flistxattr  __NR_ia32_flistxattr
+#define __NR_compat_flock       __NR_ia32_flock
+#define __NR_compat_fork        __NR_ia32_fork
+#define __NR_compat_fremovexattr        __NR_ia32_fremovexattr
+#define __NR_compat_fsetxattr   __NR_ia32_fsetxattr
+#define __NR_compat_fstat       __NR_ia32_fstat
+#define __NR_compat_fstatfs     __NR_ia32_fstatfs
+#define __NR_compat_fsync       __NR_ia32_fsync
+#define __NR_compat_ftruncate   __NR_ia32_ftruncate
+#define __NR_compat_futex       __NR_ia32_futex
+#define __NR_compat_futimesat   __NR_ia32_futimesat
+#define __NR_compat_getcpu      __NR_ia32_getcpu
+#define __NR_compat_getcwd      __NR_ia32_getcwd
+#define __NR_compat_getdents    __NR_ia32_getdents
+#define __NR_compat_getdents64  __NR_ia32_getdents64
+#define __NR_compat_getegid     __NR_ia32_getegid
+#define __NR_compat_geteuid     __NR_ia32_geteuid
+#define __NR_compat_getgid      __NR_ia32_getgid
+#define __NR_compat_getgroups32 __NR_ia32_getgroups32
+#define __NR_compat_getpgid     __NR_ia32_getpgid
+#define __NR_compat_getpgrp     __NR_ia32_getpgrp
+#define __NR_compat_getpid      __NR_ia32_getpid
+#define __NR_compat_getppid     __NR_ia32_getppid
+#define __NR_compat_getpriority __NR_ia32_getpriority
+#define __NR_compat_getresgid   __NR_ia32_getresgid
+#define __NR_compat_getresuid   __NR_ia32_getresuid
+#define __NR_compat_getrlimit   __NR_ia32_getrlimit
+#define __NR_compat_getrusage   __NR_ia32_getrusage
+#define __NR_compat_getsid      __NR_ia32_getsid
+#define __NR_compat_gettid      __NR_ia32_gettid
+#define __NR_compat_gettimeofday        __NR_ia32_gettimeofday
+#define __NR_compat_getuid      __NR_ia32_getuid
+#define __NR_compat_getxattr    __NR_ia32_getxattr
+#define __NR_compat_inotify_add_watch   __NR_ia32_inotify_add_watch
+#define __NR_compat_inotify_init        __NR_ia32_inotify_init
+#define __NR_compat_inotify_init1       __NR_ia32_inotify_init1
+#define __NR_compat_inotify_rm_watch    __NR_ia32_inotify_rm_watch
+#define __NR_compat_ioctl       __NR_ia32_ioctl
+#define __NR_compat_ioprio_set  __NR_ia32_ioprio_set
+#define __NR_compat_kill        __NR_ia32_kill
+#define __NR_compat_lgetxattr   __NR_ia32_lgetxattr
+#define __NR_compat_link        __NR_ia32_link
+#define __NR_compat_linkat      __NR_ia32_linkat
+#define __NR_compat_listxattr   __NR_ia32_listxattr
+#define __NR_compat_llistxattr  __NR_ia32_llistxattr
+#define __NR_compat_lremovexattr        __NR_ia32_lremovexattr
+#define __NR_compat_lseek       __NR_ia32_lseek
+#define __NR_compat_lsetxattr   __NR_ia32_lsetxattr
+#define __NR_compat_lstat       __NR_ia32_lstat
+#define __NR_compat_madvise     __NR_ia32_madvise
+#define __NR_compat_mincore     __NR_ia32_mincore
+#define __NR_compat_mkdir       __NR_ia32_mkdir
+#define __NR_compat_mkdirat     __NR_ia32_mkdirat
+#define __NR_compat_mknod       __NR_ia32_mknod
+#define __NR_compat_mknodat     __NR_ia32_mknodat
+#define __NR_compat_mlock       __NR_ia32_mlock
+#define __NR_compat_munlock     __NR_ia32_munlock
+#define __NR_compat_mlockall    __NR_ia32_mlockall
+#define __NR_compat_munlockall  __NR_ia32_munlockall
+#define __NR_compat_modify_ldt  __NR_ia32_modify_ldt
+#define __NR_compat_mount       __NR_ia32_mount
+#define __NR_compat_mprotect    __NR_ia32_mprotect
+#define __NR_compat_mremap      __NR_ia32_mremap
+#define __NR_compat_msync       __NR_ia32_msync
+#define __NR_compat_munmap      __NR_ia32_munmap
+#define __NR_compat_name_to_handle_at   __NR_ia32_name_to_handle_at
+#define __NR_compat_nanosleep   __NR_ia32_nanosleep
+#define __NR_compat_open        __NR_ia32_open
+#define __NR_compat_open_by_handle_at   __NR_ia32_open_by_handle_at
+#define __NR_compat_openat      __NR_ia32_openat
+#define __NR_compat_perf_event_open     __NR_ia32_perf_event_open
+#define __NR_compat_personality __NR_ia32_personality
+#define __NR_compat_pipe        __NR_ia32_pipe
+#define __NR_compat_pipe2       __NR_ia32_pipe2
+#define __NR_compat_poll        __NR_ia32_poll
+#define __NR_compat_ppoll       __NR_ia32_ppoll
+#define __NR_compat_prctl       __NR_ia32_prctl
+#define __NR_compat_pread64     __NR_ia32_pread64
+#define __NR_compat_preadv      __NR_ia32_preadv
+#define __NR_compat_prlimit64   __NR_ia32_prlimit64
+#define __NR_compat_process_vm_readv    __NR_ia32_process_vm_readv
+#define __NR_compat_process_vm_writev   __NR_ia32_process_vm_writev
+#define __NR_compat_pselect6    __NR_ia32_pselect6
+#define __NR_compat_ptrace      __NR_ia32_ptrace
+#define __NR_compat_pwrite64    __NR_ia32_pwrite64
+#define __NR_compat_pwritev     __NR_ia32_pwritev
+#define __NR_compat_read        __NR_ia32_read
+#define __NR_compat_readahead   __NR_ia32_readahead
+#define __NR_compat_readv       __NR_ia32_readv
+#define __NR_compat_readlink    __NR_ia32_readlink
+#define __NR_compat_readlinkat  __NR_ia32_readlinkat
+#define __NR_compat_recvmmsg    __NR_ia32_recvmmsg
+#define __NR_compat_remap_file_pages    __NR_ia32_remap_file_pages
+#define __NR_compat_removexattr __NR_ia32_removexattr
+#define __NR_compat_rename      __NR_ia32_rename
+#define __NR_compat_renameat    __NR_ia32_renameat
+#define __NR_compat_restart_syscall     __NR_ia32_restart_syscall
+#define __NR_compat_rmdir       __NR_ia32_rmdir
+#define __NR_compat_rt_sigaction        __NR_ia32_rt_sigaction
+#define __NR_compat_rt_sigpending       __NR_ia32_rt_sigpending
+#define __NR_compat_rt_sigprocmask      __NR_ia32_rt_sigprocmask
+#define __NR_compat_rt_sigqueueinfo     __NR_ia32_rt_sigqueueinfo
+#define __NR_compat_rt_sigreturn        __NR_ia32_rt_sigreturn
+#define __NR_compat_rt_sigsuspend       __NR_ia32_rt_sigsuspend
+#define __NR_compat_rt_sigtimedwait     __NR_ia32_rt_sigtimedwait
+#define __NR_compat_rt_tgsigqueueinfo   __NR_ia32_rt_tgsigqueueinfo
+#define __NR_compat_sched_get_priority_max      __NR_ia32_sched_get_priority_max
+#define __NR_compat_sched_get_priority_min      __NR_ia32_sched_get_priority_min
+#define __NR_compat_sched_getaffinity   __NR_ia32_sched_getaffinity
+#define __NR_compat_sched_getparam      __NR_ia32_sched_getparam
+#define __NR_compat_sched_getscheduler  __NR_ia32_sched_getscheduler
+#define __NR_compat_sched_setaffinity   __NR_ia32_sched_setaffinity
+#define __NR_compat_sched_setscheduler  __NR_ia32_sched_setscheduler
+#define __NR_compat_sched_yield __NR_ia32_sched_yield
+#define __NR_compat_seccomp     __NR_ia32_seccomp
+#define __NR_compat_sendfile    __NR_ia32_sendfile
+#define __NR_compat_sendfile64  __NR_ia32_sendfile64
+#define __NR_compat_sendmmsg    __NR_ia32_sendmmsg
+#define __NR_compat_set_robust_list     __NR_ia32_set_robust_list
+#define __NR_compat_set_tid_address     __NR_ia32_set_tid_address
+#define __NR_compat_set_thread_area     __NR_ia32_set_thread_area
+#define __NR_compat_setgid      __NR_ia32_setgid
+#define __NR_compat_setgroups   __NR_ia32_setgroups
+#define __NR_compat_setitimer   __NR_ia32_setitimer
+#define __NR_compat_setns       __NR_ia32_setns
+#define __NR_compat_setpgid     __NR_ia32_setpgid
+#define __NR_compat_setpriority __NR_ia32_setpriority
+#define __NR_compat_setregid    __NR_ia32_setregid
+#define __NR_compat_setresgid   __NR_ia32_setresgid
+#define __NR_compat_setresuid   __NR_ia32_setresuid
+#define __NR_compat_setrlimit   __NR_ia32_setrlimit
+#define __NR_compat_setsid      __NR_ia32_setsid
+#define __NR_compat_settimeofday        __NR_ia32_settimeofday
+#define __NR_compat_setuid      __NR_ia32_setuid
+#define __NR_compat_setxattr    __NR_ia32_setxattr
+#define __NR_compat_signalfd4   __NR_ia32_signalfd4
+#define __NR_compat_sigaltstack __NR_ia32_sigaltstack
+#define __NR_compat_socketcall  __NR_ia32_socketcall
+#define __NR_compat_splice      __NR_ia32_splice
+#define __NR_compat_stat        __NR_ia32_stat
+#define __NR_compat_statfs      __NR_ia32_statfs
+#define __NR_compat_symlink     __NR_ia32_symlink
+#define __NR_compat_symlinkat   __NR_ia32_symlinkat
+#define __NR_compat_sync_file_range     __NR_ia32_sync_file_range
+#define __NR_compat_sysinfo     __NR_ia32_sysinfo
+#define __NR_compat_syslog      __NR_ia32_syslog
+#define __NR_compat_tee         __NR_ia32_tee
+#define __NR_compat_tgkill      __NR_ia32_tgkill
+#define __NR_compat_tkill       __NR_ia32_tkill
+#define __NR_compat_time        __NR_ia32_time
+#define __NR_compat_timer_create        __NR_ia32_timer_create
+#define __NR_compat_timer_delete        __NR_ia32_timer_delete
+#define __NR_compat_timer_getoverrun    __NR_ia32_timer_getoverrun
+#define __NR_compat_timer_gettime       __NR_ia32_timer_gettime
+#define __NR_compat_timer_settime       __NR_ia32_timer_settime
+#define __NR_compat_timerfd_create      __NR_ia32_timerfd_create
+#define __NR_compat_timerfd_gettime     __NR_ia32_timerfd_gettime
+#define __NR_compat_timerfd_settime     __NR_ia32_timerfd_settime
+#define __NR_compat_times               __NR_ia32_times
+#define __NR_compat_truncate    __NR_ia32_truncate
+#define __NR_compat_umask       __NR_ia32_umask
+#define __NR_compat_umount2     __NR_ia32_umount2
+#define __NR_compat_uname       __NR_ia32_uname
+#define __NR_compat_unlink      __NR_ia32_unlink
+#define __NR_compat_unlinkat    __NR_ia32_unlinkat
+#define __NR_compat_unshare     __NR_ia32_unshare
+#define __NR_compat_ustat       __NR_ia32_ustat
+#define __NR_compat_utimensat   __NR_ia32_utimensat
+#define __NR_compat_utimes      __NR_ia32_utimes
+#define __NR_compat_vfork       __NR_ia32_vfork
+#define __NR_compat_vmsplice    __NR_ia32_vmsplice
+#define __NR_compat_wait4       __NR_ia32_wait4
+#define __NR_compat_waitid      __NR_ia32_waitid
+#define __NR_compat_waitpid     __NR_ia32_waitpid
+#define __NR_compat_write       __NR_ia32_write
+#define __NR_compat_writev      __NR_ia32_writev
+#define __NR_compat_chown32     __NR_ia32_chown32
+#define __NR_compat_fadvise64   __NR_ia32_fadvise64
+#define __NR_compat_fadvise64_64        __NR_ia32_fadvise64_64
+#define __NR_compat_fchown32    __NR_ia32_fchown32
+#define __NR_compat_fcntl64     __NR_ia32_fcntl64
+#define __NR_compat_fstat64     __NR_ia32_fstat64
+#define __NR_compat_fstatat64   __NR_ia32_fstatat64
+#define __NR_compat_fstatfs64   __NR_ia32_fstatfs64
+#define __NR_compat_ftruncate64 __NR_ia32_ftruncate64
+#define __NR_compat_getegid32   __NR_ia32_getegid32
+#define __NR_compat_geteuid32   __NR_ia32_geteuid32
+#define __NR_compat_getgid32    __NR_ia32_getgid32
+#define __NR_compat_getresgid32 __NR_ia32_getresgid32
+#define __NR_compat_getresuid32 __NR_ia32_getresuid32
+#define __NR_compat_getuid32    __NR_ia32_getuid32
+#define __NR_compat_lchown32    __NR_ia32_lchown32
+#define __NR_compat_lstat64     __NR_ia32_lstat64
+#define __NR_compat_mmap2       __NR_ia32_mmap2
+#define __NR_compat__newselect  __NR_ia32__newselect
+#define __NR_compat__llseek     __NR_ia32__llseek
+#define __NR_compat_sigaction   __NR_ia32_sigaction
+#define __NR_compat_sigpending  __NR_ia32_sigpending
+#define __NR_compat_sigprocmask __NR_ia32_sigprocmask
+#define __NR_compat_sigreturn   __NR_ia32_sigreturn
+#define __NR_compat_sigsuspend  __NR_ia32_sigsuspend
+#define __NR_compat_setgid32    __NR_ia32_setgid32
+#define __NR_compat_setgroups32 __NR_ia32_setgroups32
+#define __NR_compat_setregid32  __NR_ia32_setregid32
+#define __NR_compat_setresgid32 __NR_ia32_setresgid32
+#define __NR_compat_setresuid32 __NR_ia32_setresuid32
+#define __NR_compat_setreuid32  __NR_ia32_setreuid32
+#define __NR_compat_setuid32    __NR_ia32_setuid32
+#define __NR_compat_stat64      __NR_ia32_stat64
+#define __NR_compat_statfs64    __NR_ia32_statfs64
+#define __NR_compat_truncate64  __NR_ia32_truncate64
+#define __NR_compat_ugetrlimit  __NR_ia32_ugetrlimit
+#endif
+#endif
+
+#endif /* ALT_SYSCALL_H */
diff --git a/security/chromiumos/android_whitelists.h b/security/chromiumos/android_whitelists.h
new file mode 100644
index 0000000..809410d
--- /dev/null
+++ b/security/chromiumos/android_whitelists.h
@@ -0,0 +1,696 @@
+/*
+ * Linux Security Module for Chromium OS
+ *
+ * Copyright 2018 Google LLC. All Rights Reserved
+ *
+ * Authors:
+ *      Micah Morton <mortonm@chromium.org>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef ANDROID_WHITELISTS_H
+#define ANDROID_WHITELISTS_H
+
+/*
+ * NOTE: the purpose of this header is only to pull out the definition of this
+ * array from alt-syscall.c for the purposes of readability. It should not be
+ * included in other .c files.
+ */
+
+#include "alt-syscall.h"
+
+/*
+ * Syscall overrides for android.
+ */
+
+/*
+ * Reflect the priority adjustment done by android_setpriority.
+ * Note that the prio returned by getpriority has been offset by 20.
+ * (returns 40..1 instead of -20..19)
+ */
+static asmlinkage long android_getpriority(int which, int who);
+/* Make sure nothing sets a nice value more favorable than -10. */
+static asmlinkage long android_setpriority(int which, int who, int niceval);
+static asmlinkage long
+android_sched_setscheduler(pid_t pid, int policy,
+                           struct sched_param __user *param);
+static asmlinkage long
+android_perf_event_open(struct perf_event_attr __user *attr_uptr,
+                        pid_t pid, int cpu, int group_fd, unsigned long flags);
+static asmlinkage long android_adjtimex(struct timex __user *buf);
+static asmlinkage long android_clock_adjtime(const clockid_t which_clock,
+                                             struct timex __user *buf);
+static asmlinkage long android_getcpu(unsigned __user *cpu,
+                                      unsigned __user *node,
+                                      struct getcpu_cache __user *tcache);
+#ifdef CONFIG_COMPAT
+static asmlinkage long android_compat_adjtimex(struct compat_timex __user *buf);
+static asmlinkage long
+android_compat_clock_adjtime(const clockid_t which_clock,
+                             struct compat_timex __user *buf);
+#endif /* CONFIG_COMPAT */
+
+static struct syscall_whitelist_entry android_whitelist[] = {
+	SYSCALL_ENTRY_ALT(adjtimex, android_adjtimex),
+	SYSCALL_ENTRY(brk),
+	SYSCALL_ENTRY(capget),
+	SYSCALL_ENTRY(capset),
+	SYSCALL_ENTRY(chdir),
+	SYSCALL_ENTRY_ALT(clock_adjtime, android_clock_adjtime),
+	SYSCALL_ENTRY(clock_getres),
+	SYSCALL_ENTRY(clock_gettime),
+	SYSCALL_ENTRY(clock_nanosleep),
+	SYSCALL_ENTRY(clock_settime),
+	SYSCALL_ENTRY(clone),
+	SYSCALL_ENTRY(close),
+	SYSCALL_ENTRY(dup),
+	SYSCALL_ENTRY(dup3),
+	SYSCALL_ENTRY(epoll_create1),
+	SYSCALL_ENTRY(epoll_ctl),
+	SYSCALL_ENTRY(epoll_pwait),
+	SYSCALL_ENTRY(eventfd2),
+	SYSCALL_ENTRY(execve),
+	SYSCALL_ENTRY(exit),
+	SYSCALL_ENTRY(exit_group),
+	SYSCALL_ENTRY(faccessat),
+	SYSCALL_ENTRY(fallocate),
+	SYSCALL_ENTRY(fchdir),
+	SYSCALL_ENTRY(fchmod),
+	SYSCALL_ENTRY(fchmodat),
+	SYSCALL_ENTRY(fchownat),
+	SYSCALL_ENTRY(fcntl),
+	SYSCALL_ENTRY(fdatasync),
+	SYSCALL_ENTRY(fgetxattr),
+	SYSCALL_ENTRY(flistxattr),
+	SYSCALL_ENTRY(flock),
+	SYSCALL_ENTRY(fremovexattr),
+	SYSCALL_ENTRY(fsetxattr),
+	SYSCALL_ENTRY(fstat),
+	SYSCALL_ENTRY(fstatfs),
+	SYSCALL_ENTRY(fsync),
+	SYSCALL_ENTRY(ftruncate),
+	SYSCALL_ENTRY(futex),
+	SYSCALL_ENTRY_ALT(getcpu, android_getcpu),
+	SYSCALL_ENTRY(getcwd),
+	SYSCALL_ENTRY(getdents64),
+	SYSCALL_ENTRY(getpgid),
+	SYSCALL_ENTRY(getpid),
+	SYSCALL_ENTRY(getppid),
+	SYSCALL_ENTRY_ALT(getpriority, android_getpriority),
+	SYSCALL_ENTRY(getrlimit),
+	SYSCALL_ENTRY(getrusage),
+	SYSCALL_ENTRY(getsid),
+	SYSCALL_ENTRY(gettid),
+	SYSCALL_ENTRY(gettimeofday),
+	SYSCALL_ENTRY(getxattr),
+	SYSCALL_ENTRY(inotify_add_watch),
+	SYSCALL_ENTRY(inotify_init1),
+	SYSCALL_ENTRY(inotify_rm_watch),
+	SYSCALL_ENTRY(ioctl),
+	SYSCALL_ENTRY(ioprio_set),
+	SYSCALL_ENTRY(kill),
+	SYSCALL_ENTRY(lgetxattr),
+	SYSCALL_ENTRY(linkat),
+	SYSCALL_ENTRY(listxattr),
+	SYSCALL_ENTRY(llistxattr),
+	SYSCALL_ENTRY(lremovexattr),
+	SYSCALL_ENTRY(lseek),
+	SYSCALL_ENTRY(lsetxattr),
+	SYSCALL_ENTRY(madvise),
+	SYSCALL_ENTRY(mincore),
+	SYSCALL_ENTRY(mkdirat),
+	SYSCALL_ENTRY(mknodat),
+	SYSCALL_ENTRY(mlock),
+	SYSCALL_ENTRY(mlockall),
+	SYSCALL_ENTRY(munlock),
+	SYSCALL_ENTRY(munlockall),
+	SYSCALL_ENTRY(mount),
+	SYSCALL_ENTRY(mprotect),
+	SYSCALL_ENTRY(mremap),
+	SYSCALL_ENTRY(msync),
+	SYSCALL_ENTRY(munmap),
+	SYSCALL_ENTRY(name_to_handle_at),
+	SYSCALL_ENTRY(nanosleep),
+	SYSCALL_ENTRY(open_by_handle_at),
+	SYSCALL_ENTRY(openat),
+	SYSCALL_ENTRY_ALT(perf_event_open, android_perf_event_open),
+	SYSCALL_ENTRY(personality),
+	SYSCALL_ENTRY(pipe2),
+	SYSCALL_ENTRY(ppoll),
+	SYSCALL_ENTRY_ALT(prctl, alt_sys_prctl),
+	SYSCALL_ENTRY(pread64),
+	SYSCALL_ENTRY(preadv),
+	SYSCALL_ENTRY(prlimit64),
+	SYSCALL_ENTRY(process_vm_readv),
+	SYSCALL_ENTRY(process_vm_writev),
+	SYSCALL_ENTRY(pselect6),
+	SYSCALL_ENTRY(ptrace),
+	SYSCALL_ENTRY(pwrite64),
+	SYSCALL_ENTRY(pwritev),
+	SYSCALL_ENTRY(read),
+	SYSCALL_ENTRY(readahead),
+	SYSCALL_ENTRY(readv),
+	SYSCALL_ENTRY(readlinkat),
+	SYSCALL_ENTRY(recvmmsg),
+	SYSCALL_ENTRY(remap_file_pages),
+	SYSCALL_ENTRY(removexattr),
+	SYSCALL_ENTRY(renameat),
+	SYSCALL_ENTRY(restart_syscall),
+	SYSCALL_ENTRY(rt_sigaction),
+	SYSCALL_ENTRY(rt_sigpending),
+	SYSCALL_ENTRY(rt_sigprocmask),
+	SYSCALL_ENTRY(rt_sigqueueinfo),
+	SYSCALL_ENTRY(rt_sigreturn),
+	SYSCALL_ENTRY(rt_sigsuspend),
+	SYSCALL_ENTRY(rt_sigtimedwait),
+	SYSCALL_ENTRY(rt_tgsigqueueinfo),
+	SYSCALL_ENTRY(sched_get_priority_max),
+	SYSCALL_ENTRY(sched_get_priority_min),
+	SYSCALL_ENTRY(sched_getaffinity),
+	SYSCALL_ENTRY(sched_getparam),
+	SYSCALL_ENTRY(sched_getscheduler),
+	SYSCALL_ENTRY(sched_setaffinity),
+	SYSCALL_ENTRY_ALT(sched_setscheduler, android_sched_setscheduler),
+	SYSCALL_ENTRY(sched_yield),
+	SYSCALL_ENTRY(seccomp),
+	SYSCALL_ENTRY(sendfile),
+	SYSCALL_ENTRY(sendmmsg),
+	SYSCALL_ENTRY(set_robust_list),
+	SYSCALL_ENTRY(set_tid_address),
+	SYSCALL_ENTRY(setitimer),
+	SYSCALL_ENTRY(setns),
+	SYSCALL_ENTRY(setpgid),
+	SYSCALL_ENTRY_ALT(setpriority, android_setpriority),
+	SYSCALL_ENTRY(setrlimit),
+	SYSCALL_ENTRY(setsid),
+	SYSCALL_ENTRY(settimeofday),
+	SYSCALL_ENTRY(setxattr),
+	SYSCALL_ENTRY(signalfd4),
+	SYSCALL_ENTRY(sigaltstack),
+	SYSCALL_ENTRY(splice),
+	SYSCALL_ENTRY(statfs),
+	SYSCALL_ENTRY(symlinkat),
+	SYSCALL_ENTRY(sysinfo),
+	SYSCALL_ENTRY(syslog),
+	SYSCALL_ENTRY(tee),
+	SYSCALL_ENTRY(tgkill),
+	SYSCALL_ENTRY(tkill),
+	SYSCALL_ENTRY(time),
+	SYSCALL_ENTRY(timer_create),
+	SYSCALL_ENTRY(timer_delete),
+	SYSCALL_ENTRY(timer_gettime),
+	SYSCALL_ENTRY(timer_getoverrun),
+	SYSCALL_ENTRY(timer_settime),
+	SYSCALL_ENTRY(timerfd_create),
+	SYSCALL_ENTRY(timerfd_gettime),
+	SYSCALL_ENTRY(timerfd_settime),
+	SYSCALL_ENTRY(times),
+	SYSCALL_ENTRY(truncate),
+	SYSCALL_ENTRY(umask),
+	SYSCALL_ENTRY(umount2),
+	SYSCALL_ENTRY(uname),
+	SYSCALL_ENTRY(unlinkat),
+	SYSCALL_ENTRY(unshare),
+	SYSCALL_ENTRY(utimensat),
+	SYSCALL_ENTRY(vmsplice),
+	SYSCALL_ENTRY(wait4),
+	SYSCALL_ENTRY(waitid),
+	SYSCALL_ENTRY(write),
+	SYSCALL_ENTRY(writev),
+
+	/*
+	 * Deprecated syscalls which are not wired up on new architectures
+	 * such as ARM64.
+	 */
+#ifndef CONFIG_ARM64
+	SYSCALL_ENTRY(access),
+	SYSCALL_ENTRY(chmod),
+	SYSCALL_ENTRY(open),
+	SYSCALL_ENTRY(creat),
+	SYSCALL_ENTRY(dup2),
+	SYSCALL_ENTRY(epoll_create),
+	SYSCALL_ENTRY(epoll_wait),
+	SYSCALL_ENTRY(eventfd),
+	SYSCALL_ENTRY(fork),
+	SYSCALL_ENTRY(futimesat),
+	SYSCALL_ENTRY(getdents),
+	SYSCALL_ENTRY(getpgrp),
+	SYSCALL_ENTRY(inotify_init),
+	SYSCALL_ENTRY(link),
+	SYSCALL_ENTRY(lstat),
+	SYSCALL_ENTRY(mkdir),
+	SYSCALL_ENTRY(mknod),
+	SYSCALL_ENTRY(pipe),
+	SYSCALL_ENTRY(poll),
+	SYSCALL_ENTRY(readlink),
+	SYSCALL_ENTRY(rename),
+	SYSCALL_ENTRY(rmdir),
+	SYSCALL_ENTRY(stat),
+	SYSCALL_ENTRY(symlink),
+	SYSCALL_ENTRY(unlink),
+	SYSCALL_ENTRY(ustat),
+	SYSCALL_ENTRY(utimes),
+	SYSCALL_ENTRY(vfork),
+#endif
+
+	/*
+	 * waitpid(2) is deprecated on most architectures, but still exists
+	 * on IA32.
+	 */
+#ifdef CONFIG_X86_32
+	SYSCALL_ENTRY(waitpid),
+#endif
+
+	/* IA32 uses the common socketcall(2) entrypoint for socket calls. */
+#ifdef CONFIG_X86_32
+	SYSCALL_ENTRY(socketcall),
+#else
+	SYSCALL_ENTRY(accept),
+	SYSCALL_ENTRY(accept4),
+	SYSCALL_ENTRY(bind),
+	SYSCALL_ENTRY(connect),
+	SYSCALL_ENTRY(getpeername),
+	SYSCALL_ENTRY(getsockname),
+	SYSCALL_ENTRY(getsockopt),
+	SYSCALL_ENTRY(listen),
+	SYSCALL_ENTRY(recvfrom),
+	SYSCALL_ENTRY(recvmsg),
+	SYSCALL_ENTRY(sendmsg),
+	SYSCALL_ENTRY(sendto),
+	SYSCALL_ENTRY(setsockopt),
+	SYSCALL_ENTRY(shutdown),
+	SYSCALL_ENTRY(socket),
+	SYSCALL_ENTRY(socketpair),
+	/*
+	 * recv(2)/send(2) are officially deprecated, but their entry-points
+	 * still exist on ARM.
+	 */
+#ifdef CONFIG_ARM
+	SYSCALL_ENTRY(recv),
+	SYSCALL_ENTRY(send),
+#endif
+#endif
+
+	/*
+	 * posix_fadvise(2) and sync_file_range(2) have ARM-specific wrappers
+	 * to deal with register alignment.
+	 */
+#ifdef CONFIG_ARM
+	SYSCALL_ENTRY(arm_fadvise64_64),
+	SYSCALL_ENTRY(sync_file_range2),
+#else
+#ifdef CONFIG_X86_32
+	SYSCALL_ENTRY(fadvise64_64),
+#endif
+	SYSCALL_ENTRY(fadvise64),
+	SYSCALL_ENTRY(sync_file_range),
+#endif
+
+	/* 64-bit only syscalls. */
+#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
+	SYSCALL_ENTRY(fchown),
+	SYSCALL_ENTRY(getegid),
+	SYSCALL_ENTRY(geteuid),
+	SYSCALL_ENTRY(getgid),
+	SYSCALL_ENTRY(getgroups),
+	SYSCALL_ENTRY(getresgid),
+	SYSCALL_ENTRY(getresuid),
+	SYSCALL_ENTRY(getuid),
+	SYSCALL_ENTRY(newfstatat),
+	SYSCALL_ENTRY(mmap),
+	SYSCALL_ENTRY(setgid),
+	SYSCALL_ENTRY(setgroups),
+	SYSCALL_ENTRY(setregid),
+	SYSCALL_ENTRY(setresgid),
+	SYSCALL_ENTRY(setresuid),
+	SYSCALL_ENTRY(setreuid),
+	SYSCALL_ENTRY(setuid),
+	/*
+	 * chown(2), lchown(2), and select(2) are deprecated and not wired up
+	 * on ARM64.
+	 */
+#ifndef CONFIG_ARM64
+	SYSCALL_ENTRY(chown),
+	SYSCALL_ENTRY(lchown),
+	SYSCALL_ENTRY(select),
+#endif
+#endif
+
+	/* 32-bit only syscalls. */
+#if defined(CONFIG_ARM) || defined(CONFIG_X86_32)
+	SYSCALL_ENTRY(chown32),
+	SYSCALL_ENTRY(fchown32),
+	SYSCALL_ENTRY(fcntl64),
+	SYSCALL_ENTRY(fstat64),
+	SYSCALL_ENTRY(fstatat64),
+	SYSCALL_ENTRY(fstatfs64),
+	SYSCALL_ENTRY(ftruncate64),
+	SYSCALL_ENTRY(getegid32),
+	SYSCALL_ENTRY(geteuid32),
+	SYSCALL_ENTRY(getgid32),
+	SYSCALL_ENTRY(getgroups32),
+	SYSCALL_ENTRY(getresgid32),
+	SYSCALL_ENTRY(getresuid32),
+	SYSCALL_ENTRY(getuid32),
+	SYSCALL_ENTRY(lchown32),
+	SYSCALL_ENTRY(lstat64),
+	SYSCALL_ENTRY(mmap2),
+	SYSCALL_ENTRY(_newselect),
+	SYSCALL_ENTRY(_llseek),
+	SYSCALL_ENTRY(sigaction),
+	SYSCALL_ENTRY(sigpending),
+	SYSCALL_ENTRY(sigprocmask),
+	SYSCALL_ENTRY(sigreturn),
+	SYSCALL_ENTRY(sigsuspend),
+	SYSCALL_ENTRY(sendfile64),
+	SYSCALL_ENTRY(setgid32),
+	SYSCALL_ENTRY(setgroups32),
+	SYSCALL_ENTRY(setregid32),
+	SYSCALL_ENTRY(setresgid32),
+	SYSCALL_ENTRY(setresuid32),
+	SYSCALL_ENTRY(setreuid32),
+	SYSCALL_ENTRY(setuid32),
+	SYSCALL_ENTRY(stat64),
+	SYSCALL_ENTRY(statfs64),
+	SYSCALL_ENTRY(truncate64),
+	SYSCALL_ENTRY(ugetrlimit),
+#endif
+
+	/* X86-specific syscalls. */
+#ifdef CONFIG_X86
+	SYSCALL_ENTRY(modify_ldt),
+	SYSCALL_ENTRY(set_thread_area),
+#endif
+
+#ifdef CONFIG_X86_64
+	SYSCALL_ENTRY(arch_prctl),
+#endif
+}; /* end android_whitelist */
+
+#ifdef CONFIG_COMPAT
+static struct syscall_whitelist_entry android_compat_whitelist[] = {
+	COMPAT_SYSCALL_ENTRY(access),
+	COMPAT_SYSCALL_ENTRY_ALT(adjtimex, android_compat_adjtimex),
+	COMPAT_SYSCALL_ENTRY(brk),
+	COMPAT_SYSCALL_ENTRY(capget),
+	COMPAT_SYSCALL_ENTRY(capset),
+	COMPAT_SYSCALL_ENTRY(chdir),
+	COMPAT_SYSCALL_ENTRY(chmod),
+	COMPAT_SYSCALL_ENTRY_ALT(clock_adjtime, android_compat_clock_adjtime),
+	COMPAT_SYSCALL_ENTRY(clock_getres),
+	COMPAT_SYSCALL_ENTRY(clock_gettime),
+	COMPAT_SYSCALL_ENTRY(clock_nanosleep),
+	COMPAT_SYSCALL_ENTRY(clock_settime),
+	COMPAT_SYSCALL_ENTRY(clone),
+	COMPAT_SYSCALL_ENTRY(close),
+	COMPAT_SYSCALL_ENTRY(creat),
+	COMPAT_SYSCALL_ENTRY(dup),
+	COMPAT_SYSCALL_ENTRY(dup2),
+	COMPAT_SYSCALL_ENTRY(dup3),
+	COMPAT_SYSCALL_ENTRY(epoll_create),
+	COMPAT_SYSCALL_ENTRY(epoll_create1),
+	COMPAT_SYSCALL_ENTRY(epoll_ctl),
+	COMPAT_SYSCALL_ENTRY(epoll_wait),
+	COMPAT_SYSCALL_ENTRY(epoll_pwait),
+	COMPAT_SYSCALL_ENTRY(eventfd),
+	COMPAT_SYSCALL_ENTRY(eventfd2),
+	COMPAT_SYSCALL_ENTRY(execve),
+	COMPAT_SYSCALL_ENTRY(exit),
+	COMPAT_SYSCALL_ENTRY(exit_group),
+	COMPAT_SYSCALL_ENTRY(faccessat),
+	COMPAT_SYSCALL_ENTRY(fallocate),
+	COMPAT_SYSCALL_ENTRY(fchdir),
+	COMPAT_SYSCALL_ENTRY(fchmod),
+	COMPAT_SYSCALL_ENTRY(fchmodat),
+	COMPAT_SYSCALL_ENTRY(fchownat),
+	COMPAT_SYSCALL_ENTRY(fcntl),
+	COMPAT_SYSCALL_ENTRY(fdatasync),
+	COMPAT_SYSCALL_ENTRY(fgetxattr),
+	COMPAT_SYSCALL_ENTRY(flistxattr),
+	COMPAT_SYSCALL_ENTRY(flock),
+	COMPAT_SYSCALL_ENTRY(fork),
+	COMPAT_SYSCALL_ENTRY(fremovexattr),
+	COMPAT_SYSCALL_ENTRY(fsetxattr),
+	COMPAT_SYSCALL_ENTRY(fstat),
+	COMPAT_SYSCALL_ENTRY(fstatfs),
+	COMPAT_SYSCALL_ENTRY(fsync),
+	COMPAT_SYSCALL_ENTRY(ftruncate),
+	COMPAT_SYSCALL_ENTRY(futex),
+	COMPAT_SYSCALL_ENTRY(futimesat),
+	COMPAT_SYSCALL_ENTRY_ALT(getcpu, android_getcpu),
+	COMPAT_SYSCALL_ENTRY(getcwd),
+	COMPAT_SYSCALL_ENTRY(getdents),
+	COMPAT_SYSCALL_ENTRY(getdents64),
+	COMPAT_SYSCALL_ENTRY(getpgid),
+	COMPAT_SYSCALL_ENTRY(getpgrp),
+	COMPAT_SYSCALL_ENTRY(getpid),
+	COMPAT_SYSCALL_ENTRY(getppid),
+	COMPAT_SYSCALL_ENTRY_ALT(getpriority, android_getpriority),
+	COMPAT_SYSCALL_ENTRY(getrusage),
+	COMPAT_SYSCALL_ENTRY(getsid),
+	COMPAT_SYSCALL_ENTRY(gettid),
+	COMPAT_SYSCALL_ENTRY(gettimeofday),
+	COMPAT_SYSCALL_ENTRY(getxattr),
+	COMPAT_SYSCALL_ENTRY(inotify_add_watch),
+	COMPAT_SYSCALL_ENTRY(inotify_init),
+	COMPAT_SYSCALL_ENTRY(inotify_init1),
+	COMPAT_SYSCALL_ENTRY(inotify_rm_watch),
+	COMPAT_SYSCALL_ENTRY(ioctl),
+	COMPAT_SYSCALL_ENTRY(ioprio_set),
+	COMPAT_SYSCALL_ENTRY(kill),
+	COMPAT_SYSCALL_ENTRY(lgetxattr),
+	COMPAT_SYSCALL_ENTRY(link),
+	COMPAT_SYSCALL_ENTRY(linkat),
+	COMPAT_SYSCALL_ENTRY(listxattr),
+	COMPAT_SYSCALL_ENTRY(llistxattr),
+	COMPAT_SYSCALL_ENTRY(lremovexattr),
+	COMPAT_SYSCALL_ENTRY(lseek),
+	COMPAT_SYSCALL_ENTRY(lsetxattr),
+	COMPAT_SYSCALL_ENTRY(lstat),
+	COMPAT_SYSCALL_ENTRY(madvise),
+	COMPAT_SYSCALL_ENTRY(mincore),
+	COMPAT_SYSCALL_ENTRY(mkdir),
+	COMPAT_SYSCALL_ENTRY(mkdirat),
+	COMPAT_SYSCALL_ENTRY(mknod),
+	COMPAT_SYSCALL_ENTRY(mknodat),
+	COMPAT_SYSCALL_ENTRY(mlock),
+	COMPAT_SYSCALL_ENTRY(mlockall),
+	COMPAT_SYSCALL_ENTRY(munlock),
+	COMPAT_SYSCALL_ENTRY(munlockall),
+	COMPAT_SYSCALL_ENTRY(mount),
+	COMPAT_SYSCALL_ENTRY(mprotect),
+	COMPAT_SYSCALL_ENTRY(mremap),
+	COMPAT_SYSCALL_ENTRY(msync),
+	COMPAT_SYSCALL_ENTRY(munmap),
+	COMPAT_SYSCALL_ENTRY(name_to_handle_at),
+	COMPAT_SYSCALL_ENTRY(nanosleep),
+	COMPAT_SYSCALL_ENTRY(open),
+	COMPAT_SYSCALL_ENTRY(open_by_handle_at),
+	COMPAT_SYSCALL_ENTRY(openat),
+	COMPAT_SYSCALL_ENTRY_ALT(perf_event_open, android_perf_event_open),
+	COMPAT_SYSCALL_ENTRY(personality),
+	COMPAT_SYSCALL_ENTRY(pipe),
+	COMPAT_SYSCALL_ENTRY(pipe2),
+	COMPAT_SYSCALL_ENTRY(poll),
+	COMPAT_SYSCALL_ENTRY(ppoll),
+	COMPAT_SYSCALL_ENTRY_ALT(prctl, alt_sys_prctl),
+	COMPAT_SYSCALL_ENTRY(pread64),
+	COMPAT_SYSCALL_ENTRY(preadv),
+	COMPAT_SYSCALL_ENTRY(prlimit64),
+	COMPAT_SYSCALL_ENTRY(process_vm_readv),
+	COMPAT_SYSCALL_ENTRY(process_vm_writev),
+	COMPAT_SYSCALL_ENTRY(pselect6),
+	COMPAT_SYSCALL_ENTRY(ptrace),
+	COMPAT_SYSCALL_ENTRY(pwrite64),
+	COMPAT_SYSCALL_ENTRY(pwritev),
+	COMPAT_SYSCALL_ENTRY(read),
+	COMPAT_SYSCALL_ENTRY(readahead),
+	COMPAT_SYSCALL_ENTRY(readv),
+	COMPAT_SYSCALL_ENTRY(readlink),
+	COMPAT_SYSCALL_ENTRY(readlinkat),
+	COMPAT_SYSCALL_ENTRY(recvmmsg),
+	COMPAT_SYSCALL_ENTRY(remap_file_pages),
+	COMPAT_SYSCALL_ENTRY(removexattr),
+	COMPAT_SYSCALL_ENTRY(rename),
+	COMPAT_SYSCALL_ENTRY(renameat),
+	COMPAT_SYSCALL_ENTRY(restart_syscall),
+	COMPAT_SYSCALL_ENTRY(rmdir),
+	COMPAT_SYSCALL_ENTRY(rt_sigaction),
+	COMPAT_SYSCALL_ENTRY(rt_sigpending),
+	COMPAT_SYSCALL_ENTRY(rt_sigprocmask),
+	COMPAT_SYSCALL_ENTRY(rt_sigqueueinfo),
+	COMPAT_SYSCALL_ENTRY(rt_sigreturn),
+	COMPAT_SYSCALL_ENTRY(rt_sigsuspend),
+	COMPAT_SYSCALL_ENTRY(rt_sigtimedwait),
+	COMPAT_SYSCALL_ENTRY(rt_tgsigqueueinfo),
+	COMPAT_SYSCALL_ENTRY(sched_get_priority_max),
+	COMPAT_SYSCALL_ENTRY(sched_get_priority_min),
+	COMPAT_SYSCALL_ENTRY(sched_getaffinity),
+	COMPAT_SYSCALL_ENTRY(sched_getparam),
+	COMPAT_SYSCALL_ENTRY(sched_getscheduler),
+	COMPAT_SYSCALL_ENTRY(sched_setaffinity),
+	COMPAT_SYSCALL_ENTRY_ALT(sched_setscheduler,
+				 android_sched_setscheduler),
+	COMPAT_SYSCALL_ENTRY(sched_yield),
+	COMPAT_SYSCALL_ENTRY(seccomp),
+	COMPAT_SYSCALL_ENTRY(sendfile),
+	COMPAT_SYSCALL_ENTRY(sendfile64),
+	COMPAT_SYSCALL_ENTRY(sendmmsg),
+	COMPAT_SYSCALL_ENTRY(set_robust_list),
+	COMPAT_SYSCALL_ENTRY(set_tid_address),
+	COMPAT_SYSCALL_ENTRY(setitimer),
+	COMPAT_SYSCALL_ENTRY(setns),
+	COMPAT_SYSCALL_ENTRY(setpgid),
+	COMPAT_SYSCALL_ENTRY_ALT(setpriority, android_setpriority),
+	COMPAT_SYSCALL_ENTRY(setrlimit),
+	COMPAT_SYSCALL_ENTRY(setsid),
+	COMPAT_SYSCALL_ENTRY(settimeofday),
+	COMPAT_SYSCALL_ENTRY(setxattr),
+	COMPAT_SYSCALL_ENTRY(signalfd4),
+	COMPAT_SYSCALL_ENTRY(sigaltstack),
+	COMPAT_SYSCALL_ENTRY(splice),
+	COMPAT_SYSCALL_ENTRY(stat),
+	COMPAT_SYSCALL_ENTRY(statfs),
+	COMPAT_SYSCALL_ENTRY(symlink),
+	COMPAT_SYSCALL_ENTRY(symlinkat),
+	COMPAT_SYSCALL_ENTRY(sysinfo),
+	COMPAT_SYSCALL_ENTRY(syslog),
+	COMPAT_SYSCALL_ENTRY(tgkill),
+	COMPAT_SYSCALL_ENTRY(tee),
+	COMPAT_SYSCALL_ENTRY(tkill),
+	COMPAT_SYSCALL_ENTRY(time),
+	COMPAT_SYSCALL_ENTRY(timer_create),
+	COMPAT_SYSCALL_ENTRY(timer_delete),
+	COMPAT_SYSCALL_ENTRY(timer_gettime),
+	COMPAT_SYSCALL_ENTRY(timer_getoverrun),
+	COMPAT_SYSCALL_ENTRY(timer_settime),
+	COMPAT_SYSCALL_ENTRY(timerfd_create),
+	COMPAT_SYSCALL_ENTRY(timerfd_gettime),
+	COMPAT_SYSCALL_ENTRY(timerfd_settime),
+	COMPAT_SYSCALL_ENTRY(times),
+	COMPAT_SYSCALL_ENTRY(truncate),
+	COMPAT_SYSCALL_ENTRY(umask),
+	COMPAT_SYSCALL_ENTRY(umount2),
+	COMPAT_SYSCALL_ENTRY(uname),
+	COMPAT_SYSCALL_ENTRY(unlink),
+	COMPAT_SYSCALL_ENTRY(unlinkat),
+	COMPAT_SYSCALL_ENTRY(unshare),
+	COMPAT_SYSCALL_ENTRY(ustat),
+	COMPAT_SYSCALL_ENTRY(utimensat),
+	COMPAT_SYSCALL_ENTRY(utimes),
+	COMPAT_SYSCALL_ENTRY(vfork),
+	COMPAT_SYSCALL_ENTRY(vmsplice),
+	COMPAT_SYSCALL_ENTRY(wait4),
+	COMPAT_SYSCALL_ENTRY(waitid),
+	COMPAT_SYSCALL_ENTRY(write),
+	COMPAT_SYSCALL_ENTRY(writev),
+	COMPAT_SYSCALL_ENTRY(chown32),
+	COMPAT_SYSCALL_ENTRY(fchown32),
+	COMPAT_SYSCALL_ENTRY(fcntl64),
+	COMPAT_SYSCALL_ENTRY(fstat64),
+	COMPAT_SYSCALL_ENTRY(fstatat64),
+	COMPAT_SYSCALL_ENTRY(fstatfs64),
+	COMPAT_SYSCALL_ENTRY(ftruncate64),
+	COMPAT_SYSCALL_ENTRY(getegid),
+	COMPAT_SYSCALL_ENTRY(getegid32),
+	COMPAT_SYSCALL_ENTRY(geteuid),
+	COMPAT_SYSCALL_ENTRY(geteuid32),
+	COMPAT_SYSCALL_ENTRY(getgid),
+	COMPAT_SYSCALL_ENTRY(getgid32),
+	COMPAT_SYSCALL_ENTRY(getgroups32),
+	COMPAT_SYSCALL_ENTRY(getresgid32),
+	COMPAT_SYSCALL_ENTRY(getresuid32),
+	COMPAT_SYSCALL_ENTRY(getuid),
+	COMPAT_SYSCALL_ENTRY(getuid32),
+	COMPAT_SYSCALL_ENTRY(lchown32),
+	COMPAT_SYSCALL_ENTRY(lstat64),
+	COMPAT_SYSCALL_ENTRY(mmap2),
+	COMPAT_SYSCALL_ENTRY(_newselect),
+	COMPAT_SYSCALL_ENTRY(_llseek),
+	COMPAT_SYSCALL_ENTRY(sigaction),
+	COMPAT_SYSCALL_ENTRY(sigpending),
+	COMPAT_SYSCALL_ENTRY(sigprocmask),
+	COMPAT_SYSCALL_ENTRY(sigreturn),
+	COMPAT_SYSCALL_ENTRY(sigsuspend),
+	COMPAT_SYSCALL_ENTRY(setgid32),
+	COMPAT_SYSCALL_ENTRY(setgroups32),
+	COMPAT_SYSCALL_ENTRY(setregid32),
+	COMPAT_SYSCALL_ENTRY(setresgid32),
+	COMPAT_SYSCALL_ENTRY(setresuid32),
+	COMPAT_SYSCALL_ENTRY(setreuid32),
+	COMPAT_SYSCALL_ENTRY(setuid32),
+	COMPAT_SYSCALL_ENTRY(stat64),
+	COMPAT_SYSCALL_ENTRY(statfs64),
+	COMPAT_SYSCALL_ENTRY(truncate64),
+	COMPAT_SYSCALL_ENTRY(ugetrlimit),
+
+	/*
+	 * waitpid(2) is deprecated on most architectures, but still exists
+	 * on IA32.
+	 */
+#ifdef CONFIG_X86
+	COMPAT_SYSCALL_ENTRY(waitpid),
+#endif
+
+	/*
+	 * posix_fadvise(2) and sync_file_range(2) have ARM-specific wrappers
+	 * to deal with register alignment.
+	 */
+#ifdef CONFIG_ARM64
+	COMPAT_SYSCALL_ENTRY(arm_fadvise64_64),
+	COMPAT_SYSCALL_ENTRY(sync_file_range2),
+#else
+	COMPAT_SYSCALL_ENTRY(fadvise64_64),
+	COMPAT_SYSCALL_ENTRY(fadvise64),
+	COMPAT_SYSCALL_ENTRY(sync_file_range),
+#endif
+
+	/* IA32 uses the common socketcall(2) entrypoint for socket calls. */
+#ifdef CONFIG_X86
+	COMPAT_SYSCALL_ENTRY(socketcall),
+#else
+	COMPAT_SYSCALL_ENTRY(accept),
+	COMPAT_SYSCALL_ENTRY(accept4),
+	COMPAT_SYSCALL_ENTRY(bind),
+	COMPAT_SYSCALL_ENTRY(connect),
+	COMPAT_SYSCALL_ENTRY(getpeername),
+	COMPAT_SYSCALL_ENTRY(getsockname),
+	COMPAT_SYSCALL_ENTRY(getsockopt),
+	COMPAT_SYSCALL_ENTRY(listen),
+	COMPAT_SYSCALL_ENTRY(recvfrom),
+	COMPAT_SYSCALL_ENTRY(recvmsg),
+	COMPAT_SYSCALL_ENTRY(sendmsg),
+	COMPAT_SYSCALL_ENTRY(sendto),
+	COMPAT_SYSCALL_ENTRY(setsockopt),
+	COMPAT_SYSCALL_ENTRY(shutdown),
+	COMPAT_SYSCALL_ENTRY(socket),
+	COMPAT_SYSCALL_ENTRY(socketpair),
+	COMPAT_SYSCALL_ENTRY(recv),
+	COMPAT_SYSCALL_ENTRY(send),
+#endif
+
+	/*
+	 * getrlimit(2) is deprecated and not wired in the ARM compat table
+	 * on ARM64.
+	 */
+#ifndef CONFIG_ARM64
+	COMPAT_SYSCALL_ENTRY(getrlimit),
+#endif
+
+	/* x86-specific syscalls. */
+#ifdef CONFIG_X86
+	COMPAT_SYSCALL_ENTRY(modify_ldt),
+	COMPAT_SYSCALL_ENTRY(set_thread_area),
+#endif
+}; /* end android_compat_whitelist */
+#endif /* CONFIG_COMPAT */
+
+#endif /* ANDROID_WHITELISTS_H */
diff --git a/security/chromiumos/complete_whitelists.h b/security/chromiumos/complete_whitelists.h
new file mode 100644
index 0000000..02a36cc
--- /dev/null
+++ b/security/chromiumos/complete_whitelists.h
@@ -0,0 +1,404 @@
+/*
+ * Linux Security Module for Chromium OS
+ *
+ * Copyright 2018 Google LLC. All Rights Reserved
+ *
+ * Authors:
+ *      Micah Morton <mortonm@chromium.org>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef COMPLETE_WHITELISTS_H
+#define COMPLETE_WHITELISTS_H
+
+/*
+ * NOTE: the purpose of this header is only to pull out the definition of this
+ * array from alt-syscall.c for the purposes of readability. It should not be
+ * included in other .c files.
+ */
+
+#include "alt-syscall.h"
+
+static struct syscall_whitelist_entry complete_whitelist[] = {
+	/* Syscalls wired up on ARM32/ARM64 and x86_64. */
+	SYSCALL_ENTRY(accept),
+	SYSCALL_ENTRY(accept4),
+	SYSCALL_ENTRY(acct),
+	SYSCALL_ENTRY(add_key),
+	SYSCALL_ENTRY(adjtimex),
+	SYSCALL_ENTRY(bind),
+	SYSCALL_ENTRY(brk),
+	SYSCALL_ENTRY(capget),
+	SYSCALL_ENTRY(capset),
+	SYSCALL_ENTRY(chdir),
+	SYSCALL_ENTRY(chroot),
+	SYSCALL_ENTRY(clock_adjtime),
+	SYSCALL_ENTRY(clock_getres),
+	SYSCALL_ENTRY(clock_gettime),
+	SYSCALL_ENTRY(clock_nanosleep),
+	SYSCALL_ENTRY(clock_settime),
+	SYSCALL_ENTRY(clone),
+	SYSCALL_ENTRY(close),
+	SYSCALL_ENTRY(connect),
+	SYSCALL_ENTRY(delete_module),
+	SYSCALL_ENTRY(dup),
+	SYSCALL_ENTRY(dup3),
+	SYSCALL_ENTRY(epoll_create1),
+	SYSCALL_ENTRY(epoll_ctl),
+	SYSCALL_ENTRY(epoll_pwait),
+	SYSCALL_ENTRY(eventfd2),
+	SYSCALL_ENTRY(execve),
+	SYSCALL_ENTRY(exit),
+	SYSCALL_ENTRY(exit_group),
+	SYSCALL_ENTRY(faccessat),
+	SYSCALL_ENTRY(fallocate),
+	SYSCALL_ENTRY(fanotify_init),
+	SYSCALL_ENTRY(fanotify_mark),
+	SYSCALL_ENTRY(fchdir),
+	SYSCALL_ENTRY(fchmod),
+	SYSCALL_ENTRY(fchmodat),
+	SYSCALL_ENTRY(fchown),
+	SYSCALL_ENTRY(fchownat),
+	SYSCALL_ENTRY(fcntl),
+	SYSCALL_ENTRY(fdatasync),
+	SYSCALL_ENTRY(fgetxattr),
+	SYSCALL_ENTRY(finit_module),
+	SYSCALL_ENTRY(flistxattr),
+	SYSCALL_ENTRY(flock),
+	SYSCALL_ENTRY(fremovexattr),
+	SYSCALL_ENTRY(fsetxattr),
+	SYSCALL_ENTRY(fstatfs),
+	SYSCALL_ENTRY(fsync),
+	SYSCALL_ENTRY(ftruncate),
+	SYSCALL_ENTRY(futex),
+	SYSCALL_ENTRY(getcpu),
+	SYSCALL_ENTRY(getcwd),
+	SYSCALL_ENTRY(getdents64),
+	SYSCALL_ENTRY(getegid),
+	SYSCALL_ENTRY(geteuid),
+	SYSCALL_ENTRY(getgid),
+	SYSCALL_ENTRY(getgroups),
+	SYSCALL_ENTRY(getitimer),
+	SYSCALL_ENTRY(get_mempolicy),
+	SYSCALL_ENTRY(getpeername),
+	SYSCALL_ENTRY(getpgid),
+	SYSCALL_ENTRY(getpid),
+	SYSCALL_ENTRY(getppid),
+	SYSCALL_ENTRY(getpriority),
+	SYSCALL_ENTRY(getrandom),
+	SYSCALL_ENTRY(getresgid),
+	SYSCALL_ENTRY(getresuid),
+	SYSCALL_ENTRY(getrlimit),
+	SYSCALL_ENTRY(get_robust_list),
+	SYSCALL_ENTRY(getrusage),
+	SYSCALL_ENTRY(getsid),
+	SYSCALL_ENTRY(getsockname),
+	SYSCALL_ENTRY(getsockopt),
+	SYSCALL_ENTRY(gettid),
+	SYSCALL_ENTRY(gettimeofday),
+	SYSCALL_ENTRY(getuid),
+	SYSCALL_ENTRY(getxattr),
+	SYSCALL_ENTRY(init_module),
+	SYSCALL_ENTRY(inotify_add_watch),
+	SYSCALL_ENTRY(inotify_init1),
+	SYSCALL_ENTRY(inotify_rm_watch),
+	SYSCALL_ENTRY(io_cancel),
+	SYSCALL_ENTRY(ioctl),
+	SYSCALL_ENTRY(io_destroy),
+	SYSCALL_ENTRY(io_getevents),
+	SYSCALL_ENTRY(ioprio_get),
+	SYSCALL_ENTRY(ioprio_set),
+	SYSCALL_ENTRY(io_setup),
+	SYSCALL_ENTRY(io_submit),
+	SYSCALL_ENTRY(kcmp),
+	SYSCALL_ENTRY(kexec_load),
+	SYSCALL_ENTRY(keyctl),
+	SYSCALL_ENTRY(kill),
+	SYSCALL_ENTRY(lgetxattr),
+	SYSCALL_ENTRY(linkat),
+	SYSCALL_ENTRY(listen),
+	SYSCALL_ENTRY(listxattr),
+	SYSCALL_ENTRY(llistxattr),
+	SYSCALL_ENTRY(lookup_dcookie),
+	SYSCALL_ENTRY(lremovexattr),
+	SYSCALL_ENTRY(lseek),
+	SYSCALL_ENTRY(lsetxattr),
+	SYSCALL_ENTRY(madvise),
+	SYSCALL_ENTRY(mbind),
+	SYSCALL_ENTRY(memfd_create),
+	SYSCALL_ENTRY(mincore),
+	SYSCALL_ENTRY(mkdirat),
+	SYSCALL_ENTRY(mknodat),
+	SYSCALL_ENTRY(mlock),
+	SYSCALL_ENTRY(mlockall),
+	SYSCALL_ENTRY(mount),
+	SYSCALL_ENTRY(move_pages),
+	SYSCALL_ENTRY(mprotect),
+	SYSCALL_ENTRY(mq_getsetattr),
+	SYSCALL_ENTRY(mq_notify),
+	SYSCALL_ENTRY(mq_open),
+	SYSCALL_ENTRY(mq_timedreceive),
+	SYSCALL_ENTRY(mq_timedsend),
+	SYSCALL_ENTRY(mq_unlink),
+	SYSCALL_ENTRY(mremap),
+	SYSCALL_ENTRY(msgctl),
+	SYSCALL_ENTRY(msgget),
+	SYSCALL_ENTRY(msgrcv),
+	SYSCALL_ENTRY(msgsnd),
+	SYSCALL_ENTRY(msync),
+	SYSCALL_ENTRY(munlock),
+	SYSCALL_ENTRY(munlockall),
+	SYSCALL_ENTRY(munmap),
+	SYSCALL_ENTRY(name_to_handle_at),
+	SYSCALL_ENTRY(nanosleep),
+	SYSCALL_ENTRY(openat),
+	SYSCALL_ENTRY(open_by_handle_at),
+	SYSCALL_ENTRY(perf_event_open),
+	SYSCALL_ENTRY(personality),
+	SYSCALL_ENTRY(pipe2),
+	SYSCALL_ENTRY(pivot_root),
+	SYSCALL_ENTRY(ppoll),
+	SYSCALL_ENTRY_ALT(prctl, alt_sys_prctl),
+	SYSCALL_ENTRY(pread64),
+	SYSCALL_ENTRY(preadv),
+	SYSCALL_ENTRY(prlimit64),
+	SYSCALL_ENTRY(process_vm_readv),
+	SYSCALL_ENTRY(process_vm_writev),
+	SYSCALL_ENTRY(pselect6),
+	SYSCALL_ENTRY(ptrace),
+	SYSCALL_ENTRY(pwrite64),
+	SYSCALL_ENTRY(pwritev),
+	SYSCALL_ENTRY(quotactl),
+	SYSCALL_ENTRY(read),
+	SYSCALL_ENTRY(readahead),
+	SYSCALL_ENTRY(readlinkat),
+	SYSCALL_ENTRY(readv),
+	SYSCALL_ENTRY(reboot),
+	SYSCALL_ENTRY(recvfrom),
+	SYSCALL_ENTRY(recvmmsg),
+	SYSCALL_ENTRY(recvmsg),
+	SYSCALL_ENTRY(remap_file_pages),
+	SYSCALL_ENTRY(removexattr),
+	SYSCALL_ENTRY(renameat),
+	SYSCALL_ENTRY(request_key),
+	SYSCALL_ENTRY(restart_syscall),
+	SYSCALL_ENTRY(rt_sigaction),
+	SYSCALL_ENTRY(rt_sigpending),
+	SYSCALL_ENTRY(rt_sigprocmask),
+	SYSCALL_ENTRY(rt_sigqueueinfo),
+	SYSCALL_ENTRY(rt_sigsuspend),
+	SYSCALL_ENTRY(rt_sigtimedwait),
+	SYSCALL_ENTRY(rt_tgsigqueueinfo),
+	SYSCALL_ENTRY(sched_getaffinity),
+	SYSCALL_ENTRY(sched_getattr),
+	SYSCALL_ENTRY(sched_getparam),
+	SYSCALL_ENTRY(sched_get_priority_max),
+	SYSCALL_ENTRY(sched_get_priority_min),
+	SYSCALL_ENTRY(sched_getscheduler),
+	SYSCALL_ENTRY(sched_rr_get_interval),
+	SYSCALL_ENTRY(sched_setaffinity),
+	SYSCALL_ENTRY(sched_setattr),
+	SYSCALL_ENTRY(sched_setparam),
+	SYSCALL_ENTRY(sched_setscheduler),
+	SYSCALL_ENTRY(sched_yield),
+	SYSCALL_ENTRY(seccomp),
+	SYSCALL_ENTRY(semctl),
+	SYSCALL_ENTRY(semget),
+	SYSCALL_ENTRY(semop),
+	SYSCALL_ENTRY(semtimedop),
+	SYSCALL_ENTRY(sendfile),
+	SYSCALL_ENTRY(sendmmsg),
+	SYSCALL_ENTRY(sendmsg),
+	SYSCALL_ENTRY(sendto),
+	SYSCALL_ENTRY(setdomainname),
+	SYSCALL_ENTRY(setfsgid),
+	SYSCALL_ENTRY(setfsuid),
+	SYSCALL_ENTRY(setgid),
+	SYSCALL_ENTRY(setgroups),
+	SYSCALL_ENTRY(sethostname),
+	SYSCALL_ENTRY(setitimer),
+	SYSCALL_ENTRY(set_mempolicy),
+	SYSCALL_ENTRY(setns),
+	SYSCALL_ENTRY(setpgid),
+	SYSCALL_ENTRY(setpriority),
+	SYSCALL_ENTRY(setregid),
+	SYSCALL_ENTRY(setresgid),
+	SYSCALL_ENTRY(setresuid),
+	SYSCALL_ENTRY(setreuid),
+	SYSCALL_ENTRY(setrlimit),
+	SYSCALL_ENTRY(set_robust_list),
+	SYSCALL_ENTRY(setsid),
+	SYSCALL_ENTRY(setsockopt),
+	SYSCALL_ENTRY(set_tid_address),
+	SYSCALL_ENTRY(settimeofday),
+	SYSCALL_ENTRY(setuid),
+	SYSCALL_ENTRY(setxattr),
+	SYSCALL_ENTRY(shmat),
+	SYSCALL_ENTRY(shmctl),
+	SYSCALL_ENTRY(shmdt),
+	SYSCALL_ENTRY(shmget),
+	SYSCALL_ENTRY(shutdown),
+	SYSCALL_ENTRY(sigaltstack),
+	SYSCALL_ENTRY(signalfd4),
+	SYSCALL_ENTRY(socket),
+	SYSCALL_ENTRY(socketpair),
+	SYSCALL_ENTRY(splice),
+	SYSCALL_ENTRY(statfs),
+	SYSCALL_ENTRY(swapoff),
+	SYSCALL_ENTRY(swapon),
+	SYSCALL_ENTRY(symlinkat),
+	SYSCALL_ENTRY(sync),
+	SYSCALL_ENTRY(syncfs),
+	SYSCALL_ENTRY(sysinfo),
+	SYSCALL_ENTRY(syslog),
+	SYSCALL_ENTRY(tee),
+	SYSCALL_ENTRY(tgkill),
+	SYSCALL_ENTRY(timer_create),
+	SYSCALL_ENTRY(timer_delete),
+	SYSCALL_ENTRY(timerfd_create),
+	SYSCALL_ENTRY(timerfd_gettime),
+	SYSCALL_ENTRY(timerfd_settime),
+	SYSCALL_ENTRY(timer_getoverrun),
+	SYSCALL_ENTRY(timer_gettime),
+	SYSCALL_ENTRY(timer_settime),
+	SYSCALL_ENTRY(times),
+	SYSCALL_ENTRY(tkill),
+	SYSCALL_ENTRY(truncate),
+	SYSCALL_ENTRY(umask),
+	SYSCALL_ENTRY(unlinkat),
+	SYSCALL_ENTRY(unshare),
+	SYSCALL_ENTRY(utimensat),
+	SYSCALL_ENTRY(vhangup),
+	SYSCALL_ENTRY(vmsplice),
+	SYSCALL_ENTRY(wait4),
+	SYSCALL_ENTRY(waitid),
+	SYSCALL_ENTRY(write),
+	SYSCALL_ENTRY(writev),
+
+	/* Exist for x86_64 and ARM32 but not ARM64. */
+#ifndef CONFIG_ARM64
+	SYSCALL_ENTRY(access),
+	SYSCALL_ENTRY(alarm),
+	SYSCALL_ENTRY(chmod),
+	SYSCALL_ENTRY(chown),
+	SYSCALL_ENTRY(creat),
+	SYSCALL_ENTRY(dup2),
+	SYSCALL_ENTRY(epoll_create),
+	SYSCALL_ENTRY(epoll_wait),
+	SYSCALL_ENTRY(eventfd),
+	SYSCALL_ENTRY(fork),
+	SYSCALL_ENTRY(futimesat),
+	SYSCALL_ENTRY(getdents),
+	SYSCALL_ENTRY(getpgrp),
+	SYSCALL_ENTRY(inotify_init),
+	SYSCALL_ENTRY(lchown),
+	SYSCALL_ENTRY(link),
+	SYSCALL_ENTRY(mkdir),
+	SYSCALL_ENTRY(mknod),
+	SYSCALL_ENTRY(open),
+	SYSCALL_ENTRY(pause),
+	SYSCALL_ENTRY(pipe),
+	SYSCALL_ENTRY(poll),
+	SYSCALL_ENTRY(readlink),
+	SYSCALL_ENTRY(rename),
+	SYSCALL_ENTRY(rmdir),
+	SYSCALL_ENTRY(select),
+	SYSCALL_ENTRY(signalfd),
+	SYSCALL_ENTRY(symlink),
+	SYSCALL_ENTRY(sysfs),
+	SYSCALL_ENTRY(time),
+	SYSCALL_ENTRY(unlink),
+	SYSCALL_ENTRY(ustat),
+	SYSCALL_ENTRY(utime),
+	SYSCALL_ENTRY(utimes),
+	SYSCALL_ENTRY(vfork),
+#endif
+
+	/* Exists for ARM32 and ARM64 but not x86_64. */
+#ifndef CONFIG_X86_64
+	SYSCALL_ENTRY(sync_file_range2),
+#endif
+
+	/* Exist for x86_64 and ARM64 but not ARM32 */
+#if !defined(CONFIG_ARM) && (defined(CONFIG_ARM64) || defined(CONFIG_X86_64))
+	SYSCALL_ENTRY(fadvise64),
+	SYSCALL_ENTRY(fstat),
+	SYSCALL_ENTRY(migrate_pages),
+	SYSCALL_ENTRY(mmap),
+	SYSCALL_ENTRY(rt_sigreturn),
+	SYSCALL_ENTRY(sync_file_range),
+	SYSCALL_ENTRY(umount2),
+	SYSCALL_ENTRY(uname),
+#endif
+
+	/* Unique to ARM32. */
+#if defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
+	SYSCALL_ENTRY(arm_fadvise64_64),
+	SYSCALL_ENTRY(bdflush),
+	SYSCALL_ENTRY(fcntl64),
+	SYSCALL_ENTRY(fstat64),
+	SYSCALL_ENTRY(fstatat64),
+	SYSCALL_ENTRY(ftruncate64),
+	SYSCALL_ENTRY(ipc),
+	SYSCALL_ENTRY(lstat64),
+	SYSCALL_ENTRY(mmap2),
+	SYSCALL_ENTRY(nice),
+	SYSCALL_ENTRY(pciconfig_iobase),
+	SYSCALL_ENTRY(pciconfig_read),
+	SYSCALL_ENTRY(pciconfig_write),
+	SYSCALL_ENTRY(recv),
+	SYSCALL_ENTRY(send),
+	SYSCALL_ENTRY(sendfile64),
+	SYSCALL_ENTRY(sigaction),
+	SYSCALL_ENTRY(sigpending),
+	SYSCALL_ENTRY(sigprocmask),
+	SYSCALL_ENTRY(sigsuspend),
+	SYSCALL_ENTRY(socketcall),
+	SYSCALL_ENTRY(stat64),
+	SYSCALL_ENTRY(stime),
+	SYSCALL_ENTRY(syscall),
+	SYSCALL_ENTRY(truncate64),
+	SYSCALL_ENTRY(umount),
+	SYSCALL_ENTRY(uselib),
+#endif
+
+	/* Unique to x86_64. */
+#ifdef CONFIG_X86_64
+	SYSCALL_ENTRY(arch_prctl),
+	SYSCALL_ENTRY(ioperm),
+	SYSCALL_ENTRY(iopl),
+	SYSCALL_ENTRY(lstat),
+	SYSCALL_ENTRY(modify_ldt),
+	SYSCALL_ENTRY(newfstatat),
+	SYSCALL_ENTRY(stat),
+	SYSCALL_ENTRY(_sysctl),
+#endif
+
+	/* Unique to ARM64. */
+#if defined(CONFIG_ARM64) && !defined(CONFIG_ARM)
+	SYSCALL_ENTRY(fstatat),
+	SYSCALL_ENTRY(nfsservctl),
+	SYSCALL_ENTRY(renameat2),
+#endif
+}; /* end complete_whitelist */
+
+#ifdef CONFIG_COMPAT
+/*
+ * For now not adding a 32-bit-compatible version of the complete whitelist.
+ * Since we are not whitelisting any compat syscalls here, a call into the
+ * compat section of this "complete" alt syscall table will be redirected to
+ * block_syscall() (unless the permissive mode is used in which case the call
+ * will be redirected to warn_compat_syscall()).
+ */
+static struct syscall_whitelist_entry complete_compat_whitelist[] = {};
+#endif /* CONFIG_COMPAT */
+
+#endif /* COMPLETE_WHITELISTS_H */
diff --git a/security/chromiumos/lsm.c b/security/chromiumos/lsm.c
index 64384da..bd0a200 100644
--- a/security/chromiumos/lsm.c
+++ b/security/chromiumos/lsm.c
@@ -19,17 +19,62 @@
 
 #define pr_fmt(fmt) "Chromium OS LSM: " fmt
 
-#include <linux/module.h>
-#include <linux/security.h>
-#include <linux/sched.h>	/* current and other task related stuff */
+#include <asm/syscall.h>
 #include <linux/fs.h>
 #include <linux/fs_struct.h>
+#include <linux/hashtable.h>
+#include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/path.h>
+#include <linux/sched.h>	/* current and other task related stuff */
+#include <linux/security.h>
 
 #include "inode_mark.h"
+#include "process_management.h"
 #include "utils.h"
 
+#define NUM_BITS 8 // 128 buckets in hash table
+
+static DEFINE_HASHTABLE(process_setuid_policy_hashtable, NUM_BITS);
+
+/*
+ * Bool signifying whether to disable fixups for process management related
+ * routines in the kernel (setuid, setgid, kill). Default value is false. Can
+ * be overridden by 'disable_process_management_policies' flag. Static vars get
+ * initialized to 0/false since in BSS.
+ **/
+static bool disable_process_management_policies;
+
+/* Disable process management policies if flag passed */
+static int set_disable_process_management_policies(char *str)
+{
+	disable_process_management_policies = true;
+	return 1;
+}
+__setup("disable_process_management_policies=",
+	set_disable_process_management_policies);
+
+/*
+ * Hash table entry to store process management policy signifying that 'parent'
+ * user can use 'child' user for process management (for now that just means
+ * 'parent' can set*uid() to 'child'). Will be adding exceptions for set*gid()
+ * and kill() in the future.
+ */
+struct entry {
+	struct hlist_node next;
+	struct hlist_node dlist; /* for deletion cleanup */
+	uint64_t parent_kuid;
+	uint64_t child_kuid;
+};
+
+static DEFINE_HASHTABLE(sb_nosymfollow_hashtable, NUM_BITS);
+
+struct sb_entry {
+	struct hlist_node next;
+	struct hlist_node dlist; /* for deletion cleanup */
+	uintptr_t sb;
+};
+
 static void report(const char *origin, struct path *path, char *operation)
 {
 	char *alloced = NULL, *cmdline;
@@ -76,20 +121,32 @@
 	}
 #endif
 
-	if (!(flags & (MS_BIND | MS_MOVE | MS_SHARED | MS_PRIVATE | MS_SLAVE |
-		       MS_UNBINDABLE)) &&
+	if ((!(flags & (MS_BIND | MS_MOVE | MS_SHARED | MS_PRIVATE | MS_SLAVE |
+			MS_UNBINDABLE)) ||
+	     ((flags & MS_REMOUNT) && (flags & MS_BIND))) &&
 	    !capable(CAP_SYS_ADMIN)) {
+		int required_mnt_flags = MNT_NOEXEC | MNT_NOSUID | MNT_NODEV;
+
+		if (flags & MS_REMOUNT) {
+			/*
+			 * If this is a remount, we only require that the
+			 * requested flags are a superset of the original mount
+			 * flags.
+			 */
+			required_mnt_flags &= path->mnt->mnt_flags;
+		}
 		/*
 		 * The three flags we are interested in disallowing in
 		 * unprivileged user namespaces (MS_NOEXEC, MS_NOSUID, MS_NODEV)
-		 * cannot be modified when doing a remount/bind. The kernel
+		 * cannot be modified when doing a bind-mount. The kernel
 		 * attempts to dispatch calls to do_mount() within
 		 * fs/namespace.c in the following order:
 		 *
 		 * * If the MS_REMOUNT flag is present, it calls do_remount().
-		 *   When MS_BIND is also present, it only allows to set/unset
-		 *   MS_RDONLY. Otherwise it bails in the absence of the
-		 *   CAP_SYS_ADMIN in the init ns.
+		 *   When MS_BIND is also present, it only allows to modify the
+		 *   per-mount flags, which are copied into
+		 *   |required_mnt_flags|.  Otherwise it bails in the absence of
+		 *   the CAP_SYS_ADMIN in the init ns.
 		 * * If the MS_BIND flag is present, the only other flag checked
 		 *   is MS_REC.
 		 * * If any of the mount propagation flags are present
@@ -98,21 +155,22 @@
 		 *   flags.
 		 * * If MS_MOVE flag is present, all other flags are ignored.
 		 */
-		if (!(flags & MS_NOEXEC)) {
+		if ((required_mnt_flags & MNT_NOEXEC) && !(flags & MS_NOEXEC)) {
 			report("sb_mount", path,
 			       "Mounting a filesystem with 'exec' flag requires CAP_SYS_ADMIN in init ns");
 			pr_notice("sb_mount dev=%s type=%s flags=%#lx\n",
 				  dev_name, type, flags);
 			return -EPERM;
 		}
-		if (!(flags & MS_NOSUID)) {
+		if ((required_mnt_flags & MNT_NOSUID) && !(flags & MS_NOSUID)) {
 			report("sb_mount", path,
 			       "Mounting a filesystem with 'suid' flag requires CAP_SYS_ADMIN in init ns");
 			pr_notice("sb_mount dev=%s type=%s flags=%#lx\n",
 				  dev_name, type, flags);
 			return -EPERM;
 		}
-		if (!(flags & MS_NODEV) && strcmp(type, "devpts")) {
+		if ((required_mnt_flags & MNT_NODEV) && !(flags & MS_NODEV) &&
+		    strcmp(type, "devpts")) {
 			report("sb_mount", path,
 			       "Mounting a filesystem with 'dev' flag requires CAP_SYS_ADMIN in init ns");
 			pr_notice("sb_mount dev=%s type=%s flags=%#lx\n",
@@ -127,6 +185,8 @@
 static int module_locking = 1;
 static struct vfsmount *locked_root;
 static DEFINE_SPINLOCK(locked_root_spinlock);
+static DEFINE_SPINLOCK(process_setuid_policy_hashtable_spinlock);
+static DEFINE_SPINLOCK(sb_nosymfollow_hashtable_spinlock);
 
 #ifdef CONFIG_SYSCTL
 static int zero;
@@ -191,6 +251,106 @@
 static void check_locking_enforcement(void) { }
 #endif
 
+/* Check for entry in hash table. */
+static bool chromiumos_check_sb_nosymfollow_hashtable(struct super_block *sb)
+{
+	struct sb_entry *entry;
+	uintptr_t sb_pointer = (uintptr_t)sb;
+	bool found = false;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(sb_nosymfollow_hashtable,
+				   entry, next, sb_pointer) {
+		if (entry->sb == sb_pointer) {
+			found = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	/*
+	 * Its possible that a policy gets added in between the time we check
+	 * above and when we return false here. Such a race condition should
+	 * not affect this check however, since it would only be relevant if
+	 * userspace tried to traverse a symlink on a filesystem before that
+	 * filesystem was done being mounted (or potentially while it was being
+	 * remounted with new mount flags).
+	 */
+	return found;
+}
+
+/* Add entry to hash table. */
+static int chromiumos_add_sb_nosymfollow_hashtable(struct super_block *sb)
+{
+	struct sb_entry *new;
+	uintptr_t sb_pointer = (uintptr_t)sb;
+
+	/* Return if entry already exists */
+	if (chromiumos_check_sb_nosymfollow_hashtable(sb))
+		return 0;
+
+	new = kzalloc(sizeof(struct sb_entry), GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
+	new->sb = sb_pointer;
+	spin_lock(&sb_nosymfollow_hashtable_spinlock);
+	hash_add_rcu(sb_nosymfollow_hashtable, &new->next, sb_pointer);
+	spin_unlock(&sb_nosymfollow_hashtable_spinlock);
+	return 0;
+}
+
+/* Flush all entries from hash table. */
+void chromiumos_flush_sb_nosymfollow_hashtable(void)
+{
+	struct sb_entry *entry;
+	struct hlist_node *hlist_node;
+	unsigned int bkt_loop_cursor;
+	HLIST_HEAD(free_list);
+
+	/*
+	 * Could probably use hash_for_each_rcu here instead, but this should
+	 * be fine as well.
+	 */
+	spin_lock(&sb_nosymfollow_hashtable_spinlock);
+	hash_for_each_safe(sb_nosymfollow_hashtable, bkt_loop_cursor,
+			   hlist_node, entry, next) {
+		hash_del_rcu(&entry->next);
+		hlist_add_head(&entry->dlist, &free_list);
+	}
+	spin_unlock(&sb_nosymfollow_hashtable_spinlock);
+	synchronize_rcu();
+	hlist_for_each_entry_safe(entry, hlist_node, &free_list, dlist)
+		kfree(entry);
+}
+
+/* Remove entry from hash table. */
+static void chromiumos_remove_sb_nosymfollow_hashtable(struct super_block *sb)
+{
+	struct sb_entry *entry;
+	struct hlist_node *hlist_node;
+	uintptr_t sb_pointer = (uintptr_t)sb;
+	bool free_entry = false;
+
+	/*
+	 * Could probably use hash_for_each_rcu here instead, but this should
+	 * be fine as well.
+	 */
+	spin_lock(&sb_nosymfollow_hashtable_spinlock);
+	hash_for_each_possible_safe(sb_nosymfollow_hashtable, entry,
+			   hlist_node, next, sb_pointer) {
+		if (entry->sb == sb_pointer) {
+			hash_del_rcu(&entry->next);
+			free_entry = true;
+			break;
+		}
+	}
+	spin_unlock(&sb_nosymfollow_hashtable_spinlock);
+	if (free_entry) {
+		synchronize_rcu();
+		kfree(entry);
+	}
+}
+
 int chromiumos_security_sb_umount(struct vfsmount *mnt, int flags)
 {
 	/*
@@ -204,6 +364,9 @@
 		pr_info("umount pinned fs: refusing further module loads\n");
 	}
 
+	/* If mnt->mnt_sb is in nosymfollow hashtable, remove it. */
+	chromiumos_remove_sb_nosymfollow_hashtable(mnt->mnt_sb);
+
 	return 0;
 }
 
@@ -267,21 +430,32 @@
 	return check_pinning("request_firmware", file);
 }
 
+/*
+ * NOTE: The WARN() calls will emit a warning in cases of blocked symlink
+ * traversal attempts. These will show up in kernel warning reports
+ * collected by the crash reporter, so we have some insight on spurious
+ * failures that need addressing.
+ */
 int chromiumos_security_inode_follow_link(struct dentry *dentry,
 					  struct nameidata *nd)
 {
 	static char accessed_path[PATH_MAX];
 	enum chromiumos_inode_security_policy policy;
 
+	/* Deny if symlinks have been disabled on this superblock. */
+	if (chromiumos_check_sb_nosymfollow_hashtable(dentry->d_sb)) {
+		WARN(1,
+		     "Blocked symlink traversal for path %x:%x:%s (symlinks were disabled on this FS through the 'nosymfollow' mount option)\n",
+		     MAJOR(dentry->d_sb->s_dev),
+		     MINOR(dentry->d_sb->s_dev),
+		     dentry_path(dentry, accessed_path, PATH_MAX));
+		return -EACCES;
+	}
+
 	policy = chromiumos_get_inode_security_policy(
 		dentry,
 		CHROMIUMOS_SYMLINK_TRAVERSAL);
-	/*
-	 * Emit a warning in cases of blocked symlink traversal attempts. These
-	 * will show up in kernel warning reports collected by the crash
-	 * reporter, so we have some insight on spurious failures that need
-	 * addressing.
-	 */
+
 	WARN(policy == CHROMIUMOS_INODE_POLICY_BLOCK,
 	     "Blocked symlink traversal for path %x:%x:%s (see https://goo.gl/8xICW6 for context and rationale)\n",
 	     MAJOR(dentry->d_sb->s_dev), MINOR(dentry->d_sb->s_dev),
@@ -319,6 +493,377 @@
 	return policy == CHROMIUMOS_INODE_POLICY_BLOCK ? -EACCES : 0;
 }
 
+bool chromiumos_check_setuid_policy_hashtable_key(kuid_t parent)
+{
+	struct entry *entry;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(process_setuid_policy_hashtable,
+				   entry, next, __kuid_val(parent)) {
+		if (entry->parent_kuid == __kuid_val(parent)) {
+			rcu_read_unlock();
+			return true;
+		}
+	}
+	rcu_read_unlock();
+
+	/*
+	 * Using RCU, its possible that a policy gets added in between the time
+	 * we check above and when we return false here. This is fine, since
+	 * policy updates only happen during system startup, well before
+	 * sandboxed system services start running and the policies need to be
+	 * queried.
+	 */
+	return false;
+}
+
+bool chromiumos_check_setuid_policy_hashtable_key_value(kuid_t parent,
+							kuid_t child)
+{
+	struct entry *entry;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(process_setuid_policy_hashtable,
+				   entry, next, __kuid_val(parent)) {
+		if (entry->parent_kuid == __kuid_val(parent) &&
+		    entry->child_kuid == __kuid_val(child)) {
+			rcu_read_unlock();
+			return true;
+		}
+	}
+	rcu_read_unlock();
+
+	/*
+	 * Using RCU, its possible that a policy gets added in between the time
+	 * we check above and when we return false here. This is fine, since
+	 * policy updates only happen during system startup, well before
+	 * sandboxed system services start running and the policies need to be
+	 * queried.
+	 */
+	return false;
+}
+
+bool setuid_syscall(int num)
+{
+#ifdef CONFIG_X86_64
+	if (!(num == __NR_setreuid ||
+	      num == __NR_setuid ||
+	      num == __NR_setresuid ||
+	      num == __NR_setfsuid))
+		return false;
+#elif defined CONFIG_ARM64
+	if (!(num == __NR_setuid ||
+	      num == __NR_setreuid ||
+	      num == __NR_setfsuid ||
+	      num == __NR_setresuid ||
+	      num == __NR_compat_setuid ||
+	      num == __NR_compat_setreuid ||
+	      num == __NR_compat_setfsuid ||
+	      num == __NR_compat_setresuid ||
+	      num == __NR_compat_setreuid32 ||
+	      num == __NR_compat_setresuid32 ||
+	      num == __NR_compat_setuid32 ||
+	      num == __NR_compat_setfsuid32))
+		return false;
+#else /* CONFIG_ARM */
+	if (!(num == __NR_setreuid32 ||
+	      num == __NR_setuid32 ||
+	      num == __NR_setresuid32 ||
+	      num == __NR_setfsuid32))
+		return false;
+#endif
+	return true;
+}
+
+int chromiumos_security_capable(const struct cred *cred,
+				struct user_namespace *ns,
+				int cap)
+{
+	/* The current->mm check will fail if this is a kernel thread. */
+	if (!disable_process_management_policies &&
+	    cap == CAP_SETUID &&
+	    current->mm &&
+	    chromiumos_check_setuid_policy_hashtable_key(cred->uid)) {
+		// syscall_get_nr can theoretically return 0 or -1, but that
+		// would signify that the syscall is being aborted due to a
+		// signal, so we don't need to check for this case here.
+		if (!(setuid_syscall(syscall_get_nr(current,
+						    current_pt_regs())))) {
+			// Deny if we're not in a set*uid() syscall to avoid
+			// giving powers gated by CAP_SETUID that are related
+			// to functionality other than calling set*uid() (e.g.
+			// allowing user to set up userns uid mappings).
+			WARN(1,
+			     "Operation requires CAP_SETUID, which is not available to UID %u for operations besides approved set*uid transitions\n",
+			     __kuid_val(cred->uid));
+			return -1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * This hook inspects the string pointed to by the first parameter, looking for
+ * the "nosymfollow" mount option. The second parameter points to an empty
+ * page-sized buffer that is used for holding LSM-specific mount options that
+ * are grabbed (after this function executes, in security_sb_copy_data) from
+ * the mount string in the first parameter. Since the chromiumos LSM is stacked
+ * ahead of SELinux for ChromeOS, the page-sized buffer is empty when this
+ * function is called. If the "nosymfollow" mount option is encountered in this
+ * function, we write "nosymflw" to the empty page-sized buffer which lets us
+ * transmit information which will be visible in chromiumos_sb_kern_mount
+ * signifying that symlinks should be disabled for the sb. We store this token
+ * at a spot in the buffer that is at a greater offset than the bytes needed to
+ * record the rest of the LSM-specific mount options (e.g. those for SELinux).
+ * The "nosymfollow" option will be stripped from the mount string if it is
+ * encountered.
+ */
+int chromiumos_sb_copy_data(char *orig, char *copy)
+{
+	char *orig_copy;
+	char *orig_copy_cur;
+	char *option;
+	size_t offset = 0;
+	bool found = false;
+
+	if (!orig || *orig == 0)
+		return 0;
+
+	orig_copy = alloc_secdata();
+	if (!orig_copy)
+		return -ENOMEM;
+	strncpy(orig_copy, orig, PAGE_SIZE);
+
+	memset(orig, 0, strlen(orig));
+
+	orig_copy_cur = orig_copy;
+	while (orig_copy_cur) {
+		option = strsep(&orig_copy_cur, ",");
+		if (strcmp(option, "nosymfollow") == 0) {
+			if (found) /* Found multiple times. */
+				return -EINVAL;
+			found = true;
+		} else {
+			if (offset > 0) {
+				orig[offset] = ',';
+				offset++;
+			}
+			strcpy(orig + offset, option);
+			offset += strlen(option);
+		}
+	}
+
+	if (found)
+		strcpy(copy + offset + 1, "nosymflw");
+
+	free_secdata(orig_copy);
+	return 0;
+}
+
+/*
+ * Emit a warning when no entry found in whitelist. These will show up in
+ * kernel warning reports collected by the crash reporter, so we have some
+ * insight regarding failures that need addressing.
+ */
+void chromiumos_setuid_policy_warning(kuid_t parent, kuid_t child)
+{
+	WARN(1,
+	     "UID %u is restricted to using certain whitelisted UIDs for process management, and %u is not in the whitelist.\n",
+	     __kuid_val(parent),
+	     __kuid_val(child));
+}
+
+int chromiumos_check_uid_transition(kuid_t parent, kuid_t child)
+{
+	if (chromiumos_check_setuid_policy_hashtable_key_value(parent, child))
+		return 0;
+	chromiumos_setuid_policy_warning(parent, child);
+	return -1;
+}
+
+/*
+ * Check whether there is either an exception for user under old cred struct to
+ * use user under new cred struct, or the UID transition is allowed (by Linux
+ * set*uid rules) even without CAP_SETUID.
+ */
+int chromiumos_security_task_fix_setuid(struct cred *new,
+					const struct cred *old, int flags)
+{
+
+	/*
+	 * Do nothing if feature is turned off by kernel compile flag or there
+	 * are no setuid restrictions for this UID.
+	 */
+	if (disable_process_management_policies ||
+	    !chromiumos_check_setuid_policy_hashtable_key(old->uid))
+		return 0;
+
+	switch (flags) {
+	case LSM_SETID_RE:
+		/*
+		 * Users for which setuid restrictions exist can only set the
+		 * real UID to the real UID or the effective UID, unless an
+		 * explicit whitelist policy allows the transition.
+		 */
+		if (!uid_eq(old->uid, new->uid) &&
+			!uid_eq(old->euid, new->uid)) {
+			return chromiumos_check_uid_transition(old->uid,
+								new->uid);
+		}
+		/*
+		 * Users for which setuid restrictions exist can only set the
+		 * effective UID to the real UID, the effective UID, or the
+		 * saved set-UID, unless an explicit whitelist policy allows
+		 * the transition.
+		 */
+		if (!uid_eq(old->uid, new->euid) &&
+			!uid_eq(old->euid, new->euid) &&
+			!uid_eq(old->suid, new->euid)) {
+			return chromiumos_check_uid_transition(old->euid,
+								new->euid);
+		}
+		break;
+	case LSM_SETID_ID:
+		/*
+		 * Users for which setuid restrictions exist cannot change the
+		 * real UID or saved set-UID unless an explicit whitelist
+		 * policy allows the transition.
+		 */
+		if (!uid_eq(old->uid, new->uid)) {
+			return chromiumos_check_uid_transition(old->uid,
+								new->uid);
+		}
+		if (!uid_eq(old->suid, new->suid)) {
+			return chromiumos_check_uid_transition(old->suid,
+								new->suid);
+		}
+		break;
+	case LSM_SETID_RES:
+		/*
+		 * Users for which setuid restrictions exist cannot change the
+		 * real UID, effective UID, or saved set-UID to anything but
+		 * one of: the current real UID, the current effective UID or
+		 * the current saved set-user-ID unless an explicit whitelist
+		 * policy allows the transition.
+		 */
+		if (!uid_eq(new->uid, old->uid) &&
+			!uid_eq(new->uid, old->euid) &&
+			!uid_eq(new->uid, old->suid)) {
+			return chromiumos_check_uid_transition(old->uid,
+								new->uid);
+		}
+		if (!uid_eq(new->euid, old->uid) &&
+			!uid_eq(new->euid, old->euid) &&
+			!uid_eq(new->euid, old->suid)) {
+			return chromiumos_check_uid_transition(old->euid,
+								new->euid);
+		}
+		if (!uid_eq(new->suid, old->uid) &&
+			!uid_eq(new->suid, old->euid) &&
+			!uid_eq(new->suid, old->suid)) {
+			return chromiumos_check_uid_transition(old->suid,
+								new->suid);
+		}
+		break;
+	case LSM_SETID_FS:
+		/*
+		 * Users for which setuid restrictions exist cannot change the
+		 * filesystem UID to anything but one of: the current real UID,
+		 * the current effective UID or the current saved set-UID
+		 * unless an explicit whitelist policy allows the transition.
+		 */
+		if (!uid_eq(new->fsuid, old->uid)  &&
+			!uid_eq(new->fsuid, old->euid)  &&
+			!uid_eq(new->fsuid, old->suid) &&
+			!uid_eq(new->fsuid, old->fsuid)) {
+			return chromiumos_check_uid_transition(old->fsuid,
+								new->fsuid);
+		}
+		break;
+	}
+	return 0;
+}
+
+/* Add process management policy to hash table */
+int chromiumos_add_process_management_entry(kuid_t parent, kuid_t child)
+{
+	struct entry *new;
+
+	/* Return if entry already exists */
+	if (chromiumos_check_setuid_policy_hashtable_key_value(parent,
+							       child))
+		return 0;
+
+	new = kzalloc(sizeof(struct entry), GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
+	new->parent_kuid = __kuid_val(parent);
+	new->child_kuid = __kuid_val(child);
+	spin_lock(&process_setuid_policy_hashtable_spinlock);
+	hash_add_rcu(process_setuid_policy_hashtable,
+		     &new->next,
+		     __kuid_val(parent));
+	spin_unlock(&process_setuid_policy_hashtable_spinlock);
+	return 0;
+}
+
+void chromiumos_flush_process_management_entries(void)
+{
+	struct entry *entry;
+	struct hlist_node *hlist_node;
+	unsigned int bkt_loop_cursor;
+	HLIST_HEAD(free_list);
+
+	/*
+	 * Could probably use hash_for_each_rcu here instead, but this should
+	 * be fine as well.
+	 */
+	spin_lock(&process_setuid_policy_hashtable_spinlock);
+	hash_for_each_safe(process_setuid_policy_hashtable, bkt_loop_cursor,
+			   hlist_node, entry, next) {
+		hash_del_rcu(&entry->next);
+		hlist_add_head(&entry->dlist, &free_list);
+	}
+	spin_unlock(&process_setuid_policy_hashtable_spinlock);
+	synchronize_rcu();
+	hlist_for_each_entry_safe(entry, hlist_node, &free_list, dlist) {
+		hlist_del(&entry->dlist);
+		kfree(entry);
+	}
+}
+
+/* Unfortunately the kernel doesn't implement memmem function. */
+static void *search_buffer(void *haystack, size_t haystacklen,
+			   const void *needle, size_t needlelen)
+{
+	if (!needlelen)
+		return (void *)haystack;
+	while (haystacklen >= needlelen) {
+		haystacklen--;
+		if (!memcmp(haystack, needle, needlelen))
+			return (void *)haystack;
+		haystack++;
+	}
+	return NULL;
+}
+
+int chromiumos_sb_kern_mount(struct super_block *sb, int flags, void *data)
+{
+	int ret;
+	char search_str[10] = "\0nosymflw";
+
+	if (!data)
+		return 0;
+
+	if (search_buffer(data, PAGE_SIZE, search_str, 10)) {
+		ret = chromiumos_add_sb_nosymfollow_hashtable(sb);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int __init chromiumos_security_init(void)
 {
 	pr_info("enabled");
diff --git a/security/chromiumos/process_management.h b/security/chromiumos/process_management.h
new file mode 100644
index 0000000..85538fc
--- /dev/null
+++ b/security/chromiumos/process_management.h
@@ -0,0 +1,38 @@
+/*
+ * Linux Security Module for Chromium OS
+ *
+ * Copyright 2018 Google LLC. All Rights Reserved
+ *
+ * Author:
+ *      Micah Morton       <mortonm@chromium.org>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _SECURITY_PROCESS_MANAGEMENT_H
+#define _SECURITY_PROCESS_MANAGEMENT_H
+
+#include <linux/types.h>
+
+/* Function type. */
+enum chromiumos_process_management_file_write_type {
+	CHROMIUMOS_PROCESS_MANAGEMENT_ADD, /* Add whitelist policy. */
+	CHROMIUMOS_PROCESS_MANAGEMENT_FLUSH, /* Flush whitelist policies. */
+};
+
+/*
+ * Add entry to chromiumos process management policies to allow user 'parent'
+ * to use user 'child' for process management.
+ */
+int chromiumos_add_process_management_entry(kuid_t parent, kuid_t child);
+
+void chromiumos_flush_process_management_entries(void);
+
+#endif /* _SECURITY_PROCESS_MANAGEMENT_H */
diff --git a/security/chromiumos/read_write_test_whitelists.h b/security/chromiumos/read_write_test_whitelists.h
new file mode 100644
index 0000000..5aa7370
--- /dev/null
+++ b/security/chromiumos/read_write_test_whitelists.h
@@ -0,0 +1,56 @@
+/*
+ * Linux Security Module for Chromium OS
+ *
+ * Copyright 2018 Google LLC. All Rights Reserved
+ *
+ * Authors:
+ *      Micah Morton <mortonm@chromium.org>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef READ_WRITE_TESTS_WHITELISTS_H
+#define READ_WRITE_TESTS_WHITELISTS_H
+
+/*
+ * NOTE: the purpose of this header is only to pull out the definition of this
+ * array from alt-syscall.c for the purposes of readability. It should not be
+ * included in other .c files.
+ */
+
+#include "alt-syscall.h"
+
+static struct syscall_whitelist_entry read_write_test_whitelist[] = {
+	SYSCALL_ENTRY(exit),
+	SYSCALL_ENTRY(openat),
+	SYSCALL_ENTRY(close),
+	SYSCALL_ENTRY(read),
+	SYSCALL_ENTRY(write),
+	SYSCALL_ENTRY_ALT(prctl, alt_sys_prctl),
+
+	/* open(2) is deprecated and not wired up on ARM64. */
+#ifndef CONFIG_ARM64
+	SYSCALL_ENTRY(open),
+#endif
+}; /* end read_write_test_whitelist */
+
+#ifdef CONFIG_COMPAT
+static struct syscall_whitelist_entry read_write_test_compat_whitelist[] = {
+	COMPAT_SYSCALL_ENTRY(exit),
+	COMPAT_SYSCALL_ENTRY(open),
+	COMPAT_SYSCALL_ENTRY(openat),
+	COMPAT_SYSCALL_ENTRY(close),
+	COMPAT_SYSCALL_ENTRY(read),
+	COMPAT_SYSCALL_ENTRY(write),
+	COMPAT_SYSCALL_ENTRY_ALT(prctl, alt_sys_prctl),
+}; /* end read_write_test_compat_whitelist */
+#endif /* CONFIG_COMPAT */
+
+#endif /* READ_WRITE_TESTS_WHITELISTS_H */
diff --git a/security/chromiumos/securityfs.c b/security/chromiumos/securityfs.c
index 4bd566f..39a6e78 100644
--- a/security/chromiumos/securityfs.c
+++ b/security/chromiumos/securityfs.c
@@ -20,14 +20,17 @@
 #include <linux/dcache.h>
 #include <linux/fs.h>
 #include <linux/namei.h>
+#include <linux/sched.h>
 #include <linux/security.h>
 #include <linux/string.h>
 #include <linux/uaccess.h>
 
 #include "inode_mark.h"
+#include "process_management.h"
 
 static struct dentry *chromiumos_dir;
 static struct dentry *chromiumos_inode_policy_dir;
+static struct dentry *chromiumos_process_management_policy_dir;
 
 struct chromiumos_inode_policy_file_entry {
 	const char *name;
@@ -38,6 +41,12 @@
 	struct dentry *dentry;
 };
 
+struct chromiumos_process_management_file_entry {
+	const char *name;
+	enum chromiumos_process_management_file_write_type type;
+	struct dentry *dentry;
+};
+
 static int chromiumos_inode_policy_file_write(
 	struct chromiumos_inode_policy_file_entry *file_entry,
 	struct dentry *dentry)
@@ -87,6 +96,14 @@
 	 .handle_write = &chromiumos_inode_policy_file_flush_write},
 };
 
+static struct chromiumos_process_management_file_entry
+		chromiumos_process_management_files[] = {
+	{.name = "add_whitelist_policy",
+	 .type = CHROMIUMOS_PROCESS_MANAGEMENT_ADD},
+	{.name = "flush_whitelist_policies",
+	 .type = CHROMIUMOS_PROCESS_MANAGEMENT_FLUSH},
+};
+
 static int chromiumos_resolve_path(const char __user *buf, size_t len,
 				   struct path *path)
 {
@@ -161,7 +178,7 @@
 	struct path path = {};
 	int ret;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(current_cred()->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	if (*ppos != 0)
@@ -176,10 +193,118 @@
 	return ret < 0 ? ret : len;
 }
 
+/*
+ * In the case the input buffer contains one or more invalid UIDS, the kuid_t
+ * variables pointed to by 'parent' and 'child' will get updated but this
+ * function will return an error.
+ */
+static int chromiumos_parse_process_management_policy(const char __user *buf,
+						      size_t len,
+						      kuid_t *parent,
+						      kuid_t *child)
+{
+	char *kern_buf;
+	char *parent_buf;
+	char *child_buf;
+	const char separator[] = ":";
+	int ret;
+	size_t first_substring_length;
+	long parsed_parent;
+	long parsed_child;
+
+	/* Duplicate string from user memory and NULL-terminate */
+	kern_buf = memdup_user_nul(buf, len);
+	if (IS_ERR(kern_buf))
+		return PTR_ERR(kern_buf);
+
+	/*
+	 * Format of |buf| string should be <UID>:<UID>.
+	 * Find location of ":" in kern_buf (copied from |buf|).
+	 */
+	first_substring_length = strcspn(kern_buf, separator);
+	if (first_substring_length == 0 || first_substring_length == len) {
+		ret = -EINVAL;
+		goto free_kern;
+	}
+
+	parent_buf = kmemdup_nul(kern_buf, first_substring_length, GFP_KERNEL);
+	if (!parent_buf) {
+		ret = -ENOMEM;
+		goto free_kern;
+	}
+
+	ret = kstrtol(parent_buf, 0, &parsed_parent);
+	if (ret)
+		goto free_both;
+
+	child_buf = kern_buf + first_substring_length + 1;
+	ret = kstrtol(child_buf, 0, &parsed_child);
+	if (ret)
+		goto free_both;
+
+	*parent = make_kuid(current_user_ns(), parsed_parent);
+	if (!uid_valid(*parent)) {
+		ret = -EINVAL;
+		goto free_both;
+	}
+
+	*child = make_kuid(current_user_ns(), parsed_child);
+	if (!uid_valid(*child)) {
+		ret = -EINVAL;
+		goto free_both;
+	}
+
+free_both:
+	kfree(parent_buf);
+free_kern:
+	kfree(kern_buf);
+	return ret;
+}
+
+static ssize_t chromiumos_process_management_file_write(struct file *file,
+							const char __user *buf,
+							size_t len,
+							loff_t *ppos)
+{
+	struct chromiumos_process_management_file_entry *file_entry =
+		file->f_inode->i_private;
+	kuid_t parent;
+	kuid_t child;
+	int ret;
+
+	if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (*ppos != 0)
+		return -EINVAL;
+
+	if (file_entry->type == CHROMIUMOS_PROCESS_MANAGEMENT_FLUSH) {
+		chromiumos_flush_process_management_entries();
+		return len;
+	}
+
+	/* file_entry->type must equal CHROMIUMOS_PROCESS_MANAGEMENT_ADD */
+	ret = chromiumos_parse_process_management_policy(buf, len, &parent,
+							 &child);
+	if (ret)
+		return ret;
+
+	ret = chromiumos_add_process_management_entry(parent, child);
+	if (ret)
+		return ret;
+
+	/* Return len on success so caller won't keep trying to write */
+	return len;
+}
+
 static const struct file_operations chromiumos_inode_policy_file_fops = {
 	.write = chromiumos_inode_file_write,
 };
 
+static const struct file_operations chromiumos_process_management_file_fops = {
+	.write = chromiumos_process_management_file_write,
+};
+
 static void chromiumos_shutdown_securityfs(void)
 {
 	int i;
@@ -191,9 +316,19 @@
 		entry->dentry = NULL;
 	}
 
+	for (i = 0; i < ARRAY_SIZE(chromiumos_process_management_files); ++i) {
+		struct chromiumos_process_management_file_entry *entry =
+			&chromiumos_process_management_files[i];
+		securityfs_remove(entry->dentry);
+		entry->dentry = NULL;
+	}
+
 	securityfs_remove(chromiumos_inode_policy_dir);
 	chromiumos_inode_policy_dir = NULL;
 
+	securityfs_remove(chromiumos_process_management_policy_dir);
+	chromiumos_process_management_policy_dir = NULL;
+
 	securityfs_remove(chromiumos_dir);
 	chromiumos_dir = NULL;
 }
@@ -230,6 +365,29 @@
 		}
 	}
 
+	chromiumos_process_management_policy_dir =
+		securityfs_create_dir(
+			"process_management_policies",
+			chromiumos_dir);
+	if (!chromiumos_process_management_policy_dir) {
+		ret = PTR_ERR(chromiumos_process_management_policy_dir);
+		goto error;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(chromiumos_process_management_files); ++i) {
+		struct chromiumos_process_management_file_entry *entry =
+			&chromiumos_process_management_files[i];
+		entry->dentry = securityfs_create_file(
+			entry->name,
+			0200,
+			chromiumos_process_management_policy_dir,
+			entry, &chromiumos_process_management_file_fops);
+		if (IS_ERR(entry->dentry)) {
+			ret = PTR_ERR(entry->dentry);
+			goto error;
+		}
+	}
+
 	return 0;
 
 error:
diff --git a/security/chromiumos/third_party_whitelists.h b/security/chromiumos/third_party_whitelists.h
new file mode 100644
index 0000000..084e60e
--- /dev/null
+++ b/security/chromiumos/third_party_whitelists.h
@@ -0,0 +1,263 @@
+/*
+ * Linux Security Module for Chromium OS
+ *
+ * Copyright 2018 Google LLC. All Rights Reserved
+ *
+ * Authors:
+ *      Micah Morton <mortonm@chromium.org>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef THIRD_PARTY_WHITELISTS_H
+#define THIRD_PARTY_WHITELISTS_H
+
+/*
+ * NOTE: the purpose of this header is only to pull out the definition of this
+ * array from alt-syscall.c for the purposes of readability. It should not be
+ * included in other .c files.
+ */
+
+#include "alt-syscall.h"
+
+static struct syscall_whitelist_entry third_party_whitelist[] = {
+	SYSCALL_ENTRY(brk),
+	SYSCALL_ENTRY(chdir),
+	SYSCALL_ENTRY(clock_gettime),
+	SYSCALL_ENTRY(clone),
+	SYSCALL_ENTRY(close),
+	SYSCALL_ENTRY(dup),
+	SYSCALL_ENTRY(execve),
+	SYSCALL_ENTRY(exit),
+	SYSCALL_ENTRY(exit_group),
+	SYSCALL_ENTRY(fcntl),
+	SYSCALL_ENTRY(fstat),
+	SYSCALL_ENTRY(futex),
+	SYSCALL_ENTRY(getcwd),
+	SYSCALL_ENTRY(getdents64),
+	SYSCALL_ENTRY(getpid),
+	SYSCALL_ENTRY(getpgid),
+	SYSCALL_ENTRY(getppid),
+	SYSCALL_ENTRY(getpriority),
+	SYSCALL_ENTRY(getrlimit),
+	SYSCALL_ENTRY(getsid),
+	SYSCALL_ENTRY(gettimeofday),
+	SYSCALL_ENTRY(ioctl),
+	SYSCALL_ENTRY(lseek),
+	SYSCALL_ENTRY(madvise),
+	SYSCALL_ENTRY(mprotect),
+	SYSCALL_ENTRY(munmap),
+	SYSCALL_ENTRY(nanosleep),
+	SYSCALL_ENTRY(openat),
+	SYSCALL_ENTRY(prlimit64),
+	SYSCALL_ENTRY(read),
+	SYSCALL_ENTRY(rt_sigaction),
+	SYSCALL_ENTRY(rt_sigprocmask),
+	SYSCALL_ENTRY(rt_sigreturn),
+	SYSCALL_ENTRY(sendfile),
+	SYSCALL_ENTRY(set_robust_list),
+	SYSCALL_ENTRY(set_tid_address),
+	SYSCALL_ENTRY(setpgid),
+	SYSCALL_ENTRY(setpriority),
+	SYSCALL_ENTRY(setsid),
+	SYSCALL_ENTRY(syslog),
+	SYSCALL_ENTRY(statfs),
+	SYSCALL_ENTRY(umask),
+	SYSCALL_ENTRY(uname),
+	SYSCALL_ENTRY(wait4),
+	SYSCALL_ENTRY(write),
+	SYSCALL_ENTRY(writev),
+
+	/*
+	 * Deprecated syscalls which are not wired up on new architectures
+	 * such as ARM64.
+	 */
+#ifndef CONFIG_ARM64
+	SYSCALL_ENTRY(access),
+	SYSCALL_ENTRY(creat),
+	SYSCALL_ENTRY(dup2),
+	SYSCALL_ENTRY(getdents),
+	SYSCALL_ENTRY(getpgrp),
+	SYSCALL_ENTRY(lstat),
+	SYSCALL_ENTRY(mkdir),
+	SYSCALL_ENTRY(open),
+	SYSCALL_ENTRY(pipe),
+	SYSCALL_ENTRY(poll),
+	SYSCALL_ENTRY(readlink),
+	SYSCALL_ENTRY(stat),
+	SYSCALL_ENTRY(unlink),
+#endif
+
+	/* 32-bit only syscalls. */
+#if defined(CONFIG_ARM) || defined(CONFIG_X86_32)
+	SYSCALL_ENTRY(fcntl64),
+	SYSCALL_ENTRY(fstat64),
+	SYSCALL_ENTRY(geteuid32),
+	SYSCALL_ENTRY(getuid32),
+	SYSCALL_ENTRY(_llseek),
+	SYSCALL_ENTRY(lstat64),
+	SYSCALL_ENTRY(_newselect),
+	SYSCALL_ENTRY(mmap2),
+	SYSCALL_ENTRY(stat64),
+	SYSCALL_ENTRY(ugetrlimit),
+#endif
+
+
+	/* IA32 uses the common socketcall(2) entrypoint for socket calls. */
+#ifdef CONFIG_X86_32
+	SYSCALL_ENTRY(socketcall),
+#else
+	SYSCALL_ENTRY(accept),
+	SYSCALL_ENTRY(bind),
+	SYSCALL_ENTRY(connect),
+	SYSCALL_ENTRY(listen),
+	SYSCALL_ENTRY(recvfrom),
+	SYSCALL_ENTRY(recvmsg),
+	SYSCALL_ENTRY(sendmsg),
+	SYSCALL_ENTRY(sendto),
+	SYSCALL_ENTRY(setsockopt),
+	SYSCALL_ENTRY(socket),
+	SYSCALL_ENTRY(socketpair),
+#endif
+
+	/* 64-bit only syscalls. */
+#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
+	SYSCALL_ENTRY(getegid),
+	SYSCALL_ENTRY(geteuid),
+	SYSCALL_ENTRY(getgid),
+	SYSCALL_ENTRY(getuid),
+	SYSCALL_ENTRY(mmap),
+	SYSCALL_ENTRY(setgid),
+	SYSCALL_ENTRY(setuid),
+	/*
+	 * chown(2), lchown(2), and select(2) are deprecated and not wired up
+	 * on ARM64.
+	 */
+#ifndef CONFIG_ARM64
+	SYSCALL_ENTRY(select),
+#endif
+#endif
+
+	/* X86-specific syscalls. */
+#ifdef CONFIG_X86
+	SYSCALL_ENTRY(arch_prctl),
+#endif
+}; /* end third_party_whitelist */
+
+#ifdef CONFIG_COMPAT
+static struct syscall_whitelist_entry third_party_compat_whitelist[] = {
+	COMPAT_SYSCALL_ENTRY(access),
+	COMPAT_SYSCALL_ENTRY(brk),
+	COMPAT_SYSCALL_ENTRY(chdir),
+	COMPAT_SYSCALL_ENTRY(clock_gettime),
+	COMPAT_SYSCALL_ENTRY(clone),
+	COMPAT_SYSCALL_ENTRY(close),
+	COMPAT_SYSCALL_ENTRY(creat),
+	COMPAT_SYSCALL_ENTRY(dup),
+	COMPAT_SYSCALL_ENTRY(dup2),
+	COMPAT_SYSCALL_ENTRY(execve),
+	COMPAT_SYSCALL_ENTRY(exit),
+	COMPAT_SYSCALL_ENTRY(exit_group),
+	COMPAT_SYSCALL_ENTRY(fcntl),
+	COMPAT_SYSCALL_ENTRY(fcntl64),
+	COMPAT_SYSCALL_ENTRY(fstat),
+	COMPAT_SYSCALL_ENTRY(fstat64),
+	COMPAT_SYSCALL_ENTRY(futex),
+	COMPAT_SYSCALL_ENTRY(getcwd),
+	COMPAT_SYSCALL_ENTRY(getdents),
+	COMPAT_SYSCALL_ENTRY(getdents64),
+	COMPAT_SYSCALL_ENTRY(getegid),
+	COMPAT_SYSCALL_ENTRY(geteuid),
+	COMPAT_SYSCALL_ENTRY(geteuid32),
+	COMPAT_SYSCALL_ENTRY(getgid),
+	COMPAT_SYSCALL_ENTRY(getpgid),
+	COMPAT_SYSCALL_ENTRY(getpgrp),
+	COMPAT_SYSCALL_ENTRY(getpid),
+	COMPAT_SYSCALL_ENTRY(getpriority),
+	COMPAT_SYSCALL_ENTRY(getppid),
+	COMPAT_SYSCALL_ENTRY(getsid),
+	COMPAT_SYSCALL_ENTRY(gettimeofday),
+	COMPAT_SYSCALL_ENTRY(getuid),
+	COMPAT_SYSCALL_ENTRY(getuid32),
+	COMPAT_SYSCALL_ENTRY(ioctl),
+	COMPAT_SYSCALL_ENTRY(_llseek),
+	COMPAT_SYSCALL_ENTRY(lseek),
+	COMPAT_SYSCALL_ENTRY(lstat),
+	COMPAT_SYSCALL_ENTRY(lstat64),
+	COMPAT_SYSCALL_ENTRY(madvise),
+	COMPAT_SYSCALL_ENTRY(mkdir),
+	COMPAT_SYSCALL_ENTRY(mmap2),
+	COMPAT_SYSCALL_ENTRY(mprotect),
+	COMPAT_SYSCALL_ENTRY(munmap),
+	COMPAT_SYSCALL_ENTRY(nanosleep),
+	COMPAT_SYSCALL_ENTRY(_newselect),
+	COMPAT_SYSCALL_ENTRY(open),
+	COMPAT_SYSCALL_ENTRY(openat),
+	COMPAT_SYSCALL_ENTRY(pipe),
+	COMPAT_SYSCALL_ENTRY(poll),
+	COMPAT_SYSCALL_ENTRY(prlimit64),
+	COMPAT_SYSCALL_ENTRY(read),
+	COMPAT_SYSCALL_ENTRY(readlink),
+	COMPAT_SYSCALL_ENTRY(rt_sigaction),
+	COMPAT_SYSCALL_ENTRY(rt_sigprocmask),
+	COMPAT_SYSCALL_ENTRY(rt_sigreturn),
+	COMPAT_SYSCALL_ENTRY(sendfile),
+	COMPAT_SYSCALL_ENTRY(set_robust_list),
+	COMPAT_SYSCALL_ENTRY(set_tid_address),
+	COMPAT_SYSCALL_ENTRY(setgid32),
+	COMPAT_SYSCALL_ENTRY(setuid32),
+	COMPAT_SYSCALL_ENTRY(setpgid),
+	COMPAT_SYSCALL_ENTRY(setpriority),
+	COMPAT_SYSCALL_ENTRY(setsid),
+	COMPAT_SYSCALL_ENTRY(stat),
+	COMPAT_SYSCALL_ENTRY(stat64),
+	COMPAT_SYSCALL_ENTRY(statfs),
+	COMPAT_SYSCALL_ENTRY(syslog),
+	COMPAT_SYSCALL_ENTRY(ugetrlimit),
+	COMPAT_SYSCALL_ENTRY(umask),
+	COMPAT_SYSCALL_ENTRY(uname),
+	COMPAT_SYSCALL_ENTRY(unlink),
+	COMPAT_SYSCALL_ENTRY(wait4),
+	COMPAT_SYSCALL_ENTRY(write),
+	COMPAT_SYSCALL_ENTRY(writev),
+
+	/* IA32 uses the common socketcall(2) entrypoint for socket calls. */
+#ifdef CONFIG_X86
+	COMPAT_SYSCALL_ENTRY(socketcall),
+#else
+	COMPAT_SYSCALL_ENTRY(accept),
+	COMPAT_SYSCALL_ENTRY(bind),
+	COMPAT_SYSCALL_ENTRY(connect),
+	COMPAT_SYSCALL_ENTRY(listen),
+	COMPAT_SYSCALL_ENTRY(recvfrom),
+	COMPAT_SYSCALL_ENTRY(recvmsg),
+	COMPAT_SYSCALL_ENTRY(sendmsg),
+	COMPAT_SYSCALL_ENTRY(sendto),
+	COMPAT_SYSCALL_ENTRY(setsockopt),
+	COMPAT_SYSCALL_ENTRY(socket),
+	COMPAT_SYSCALL_ENTRY(socketpair),
+#endif
+
+	/*
+	 * getrlimit(2) is deprecated and not wired in the ARM compat table
+	 * on ARM64.
+	 */
+#ifndef CONFIG_ARM64
+	COMPAT_SYSCALL_ENTRY(getrlimit),
+#endif
+
+	/* X86-specific syscalls. */
+#ifdef CONFIG_X86
+	SYSCALL_ENTRY(arch_prctl),
+#endif
+}; /* end third_party_compat_whitelist */
+#endif /* CONFIG_COMPAT */
+
+#endif /* THIRD_PARTY_WHITELISTS_H */
diff --git a/security/security.c b/security/security.c
index a8dc501..4d5fd26 100644
--- a/security/security.c
+++ b/security/security.c
@@ -197,6 +197,11 @@
 int security_capable(const struct cred *cred, struct user_namespace *ns,
 		     int cap)
 {
+	int ret = chromiumos_security_capable(cred, ns, cap);
+
+	if (ret)
+		return ret;
+
 	return security_ops->capable(cred, ns, cap, SECURITY_CAP_AUDIT);
 }
 
@@ -273,6 +278,12 @@
 
 int security_sb_copy_data(char *orig, char *copy)
 {
+	int ret;
+
+	ret = chromiumos_sb_copy_data(orig, copy);
+	if (ret)
+		return ret;
+
 	return security_ops->sb_copy_data(orig, copy);
 }
 EXPORT_SYMBOL(security_sb_copy_data);
@@ -284,6 +295,12 @@
 
 int security_sb_kern_mount(struct super_block *sb, int flags, void *data)
 {
+	int ret;
+
+	ret = chromiumos_sb_kern_mount(sb, flags, data);
+	if (ret)
+		return ret;
+
 	return security_ops->sb_kern_mount(sb, flags, data);
 }
 
@@ -904,6 +921,11 @@
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags)
 {
+	int ret = chromiumos_security_task_fix_setuid(new, old, flags);
+
+	if (ret)
+		return ret;
+
 	return security_ops->task_fix_setuid(new, old, flags);
 }
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index e45ef57..a9d95ad 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5647,7 +5647,7 @@
 		return error;
 
 	/* Obtain a SID for the context, if one was specified. */
-	if (size && str[1] && str[1] != '\n') {
+	if (size && str[0] && str[0] != '\n') {
 		if (str[size-1] == '\n') {
 			str[size-1] = 0;
 			size--;
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index 7b596b57..417fc45 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -621,7 +621,7 @@
 int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream,
 			      struct snd_rawmidi_params * params)
 {
-	char *newbuf;
+	char *newbuf, *oldbuf;
 	struct snd_rawmidi_runtime *runtime = substream->runtime;
 	
 	if (substream->append && substream->use_count > 1)
@@ -634,13 +634,17 @@
 		return -EINVAL;
 	}
 	if (params->buffer_size != runtime->buffer_size) {
-		newbuf = krealloc(runtime->buffer, params->buffer_size,
-				  GFP_KERNEL);
+		newbuf = kmalloc(params->buffer_size, GFP_KERNEL);
 		if (!newbuf)
 			return -ENOMEM;
+		spin_lock_irq(&runtime->lock);
+		oldbuf = runtime->buffer;
 		runtime->buffer = newbuf;
 		runtime->buffer_size = params->buffer_size;
 		runtime->avail = runtime->buffer_size;
+		runtime->appl_ptr = runtime->hw_ptr = 0;
+		spin_unlock_irq(&runtime->lock);
+		kfree(oldbuf);
 	}
 	runtime->avail_min = params->avail_min;
 	substream->active_sensing = !params->no_active_sensing;
@@ -650,7 +654,7 @@
 int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream,
 			     struct snd_rawmidi_params * params)
 {
-	char *newbuf;
+	char *newbuf, *oldbuf;
 	struct snd_rawmidi_runtime *runtime = substream->runtime;
 
 	snd_rawmidi_drain_input(substream);
@@ -661,12 +665,16 @@
 		return -EINVAL;
 	}
 	if (params->buffer_size != runtime->buffer_size) {
-		newbuf = krealloc(runtime->buffer, params->buffer_size,
-				  GFP_KERNEL);
+		newbuf = kmalloc(params->buffer_size, GFP_KERNEL);
 		if (!newbuf)
 			return -ENOMEM;
+		spin_lock_irq(&runtime->lock);
+		oldbuf = runtime->buffer;
 		runtime->buffer = newbuf;
 		runtime->buffer_size = params->buffer_size;
+		runtime->appl_ptr = runtime->hw_ptr = 0;
+		spin_unlock_irq(&runtime->lock);
+		kfree(oldbuf);
 	}
 	runtime->avail_min = params->avail_min;
 	return 0;