swr: Rework scratch space allocation

Remove allocation of > 2kbyte buffers into context memory in
swr_copy_to_scatch_space() (which is used to copy small vertex/index buffers
and shader constants to a scratch space to be used by the upcoming draw.)

Large shader constant allocations need to be done in the circular scratch
buffer instead of context memory, because their values persist across
render calls.

Also lower SCRATCH_SINGLE_ALLOCATION_LIMIT to 8k, since allocations of larger
buffers will get too large for the circular scratch space.

Fixes render issues with CEI Ensight.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
diff --git a/src/gallium/drivers/swr/swr_scratch.cpp b/src/gallium/drivers/swr/swr_scratch.cpp
index 8afe73c..810132a 100644
--- a/src/gallium/drivers/swr/swr_scratch.cpp
+++ b/src/gallium/drivers/swr/swr_scratch.cpp
@@ -28,8 +28,6 @@
 #include "swr_fence_work.h"
 #include "api.h"
 
-#define SCRATCH_SINGLE_ALLOCATION_LIMIT 2048
-
 void *
 swr_copy_to_scratch_space(struct swr_context *ctx,
                           struct swr_scratch_space *space,
@@ -40,41 +38,36 @@
    assert(space);
    assert(size);
 
-   if (size >= SCRATCH_SINGLE_ALLOCATION_LIMIT) {
-      /* Use per draw SwrAllocDrawContextMemory for larger copies */
-      ptr = ctx->api.pfnSwrAllocDrawContextMemory(ctx->swrContext, size, 4);
-   } else {
-      /* Allocate enough so that MAX_DRAWS_IN_FLIGHT sets fit. */
-      uint32_t max_size_in_flight = size * ctx->max_draws_in_flight;
+   /* Allocate enough so that MAX_DRAWS_IN_FLIGHT sets fit. */
+   uint32_t max_size_in_flight = size * ctx->max_draws_in_flight;
 
-      /* Need to grow space */
-      if (max_size_in_flight > space->current_size) {
-         space->current_size = max_size_in_flight;
+   /* Need to grow space */
+   if (max_size_in_flight > space->current_size) {
+      space->current_size = max_size_in_flight;
 
-         if (space->base) {
-            /* defer delete, use aligned-free */
-            struct swr_screen *screen = swr_screen(ctx->pipe.screen);
-            swr_fence_work_free(screen->flush_fence, space->base, true);
-            space->base = NULL;
-         }
-
-         if (!space->base) {
-            space->base = (uint8_t *)AlignedMalloc(space->current_size, 
-                                                   sizeof(void *));
-            space->head = (void *)space->base;
-         }
+      if (space->base) {
+         /* defer delete, use aligned-free */
+         struct swr_screen *screen = swr_screen(ctx->pipe.screen);
+         swr_fence_work_free(screen->flush_fence, space->base, true);
+         space->base = NULL;
       }
 
-      /* Wrap */
-      if (((uint8_t *)space->head + size)
-          >= ((uint8_t *)space->base + space->current_size)) {
-         space->head = space->base;
+      if (!space->base) {
+         space->base = (uint8_t *)AlignedMalloc(space->current_size,
+                                                sizeof(void *));
+         space->head = (void *)space->base;
       }
-
-      ptr = space->head;
-      space->head = (uint8_t *)space->head + size;
    }
 
+   /* Wrap */
+   if (((uint8_t *)space->head + size)
+       >= ((uint8_t *)space->base + space->current_size)) {
+      space->head = space->base;
+   }
+
+   ptr = space->head;
+   space->head = (uint8_t *)space->head + size;
+
    /* Copy user_buffer to scratch */
    if (user_buffer)
       memcpy(ptr, user_buffer, size);
diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
index 46b3a00..b21c35e 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -57,7 +57,7 @@
 #define SWR_MAX_TEXTURE_ARRAY_LAYERS 512 /* 8K x 512 / 8K x 8K x 512 */
 
 /* Default max client_copy_limit */
-#define SWR_CLIENT_COPY_LIMIT 32768
+#define SWR_CLIENT_COPY_LIMIT 8192
 
 /* Flag indicates creation of alternate surface, to prevent recursive loop
  * in resource creation when msaa_force_enable is set. */