Merge "vp9: Write height and width to ivf header in SVC example encoder."
diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c
index a7d9536..0dc5a94 100644
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -788,19 +788,22 @@
 
   parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg);
 
+  // Y4M reader handles its own allocation.
+  if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
 // Allocate image buffer
 #if CONFIG_VP9_HIGHBITDEPTH
-  if (!vpx_img_alloc(&raw,
-                     enc_cfg.g_input_bit_depth == 8 ? VPX_IMG_FMT_I420
-                                                    : VPX_IMG_FMT_I42016,
-                     enc_cfg.g_w, enc_cfg.g_h, 32)) {
-    die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
-  }
+    if (!vpx_img_alloc(&raw,
+                       enc_cfg.g_input_bit_depth == 8 ? VPX_IMG_FMT_I420
+                                                      : VPX_IMG_FMT_I42016,
+                       enc_cfg.g_w, enc_cfg.g_h, 32)) {
+      die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
+    }
 #else
-  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32)) {
-    die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
-  }
+    if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32)) {
+      die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
+    }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
+  }
 
   // Initialize codec
   if (vpx_svc_init(&svc_ctx, &codec, vpx_codec_vp9_cx(), &enc_cfg) !=
@@ -1129,7 +1132,9 @@
   printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
          frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
          1000000 * (double)frame_cnt / (double)cx_time);
-  vpx_img_free(&raw);
+  if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
+    vpx_img_free(&raw);
+  }
   // display average size, psnr
   vpx_svc_dump_statistics(&svc_ctx);
   vpx_svc_release(&svc_ctx);
diff --git a/examples/vpx_temporal_svc_encoder.c b/examples/vpx_temporal_svc_encoder.c
index aa2213a..ba71ca7 100644
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@@ -93,14 +93,15 @@
 // in the stream.
 static void set_rate_control_metrics(struct RateControlMetrics *rc,
                                      vpx_codec_enc_cfg_t *cfg) {
-  unsigned int i = 0;
+  int i = 0;
   // Set the layer (cumulative) framerate and the target layer (non-cumulative)
   // per-frame-bandwidth, for the rate control encoding stats below.
   const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
+  const int ts_number_layers = cfg->ts_number_layers;
   rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0];
   rc->layer_pfb[0] =
       1000.0 * rc->layer_target_bitrate[0] / rc->layer_framerate[0];
-  for (i = 0; i < cfg->ts_number_layers; ++i) {
+  for (i = 0; i < ts_number_layers; ++i) {
     if (i > 0) {
       rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i];
       rc->layer_pfb[i] =
@@ -119,6 +120,9 @@
   rc->window_size = 15;
   rc->avg_st_encoding_bitrate = 0.0;
   rc->variance_st_encoding_bitrate = 0.0;
+  // Target bandwidth for the whole stream.
+  // Set to layer_target_bitrate for highest layer (total bitrate).
+  cfg->rc_target_bitrate = rc->layer_target_bitrate[ts_number_layers - 1];
 }
 
 static void printout_rate_control_summary(struct RateControlMetrics *rc,
@@ -657,6 +661,9 @@
     die("Invalid number of arguments");
   }
 
+  input_ctx.filename = argv[1];
+  open_input_file(&input_ctx);
+
 #if CONFIG_VP9_HIGHBITDEPTH
   switch (strtol(argv[argc - 1], NULL, 0)) {
     case 8:
@@ -673,14 +680,22 @@
       break;
     default: die("Invalid bit depth (8, 10, 12) %s", argv[argc - 1]);
   }
-  if (!vpx_img_alloc(
-          &raw, bit_depth == VPX_BITS_8 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_I42016,
-          width, height, 32)) {
-    die("Failed to allocate image", width, height);
+
+  // Y4M reader has its own allocation.
+  if (input_ctx.file_type != FILE_TYPE_Y4M) {
+    if (!vpx_img_alloc(
+            &raw,
+            bit_depth == VPX_BITS_8 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_I42016,
+            width, height, 32)) {
+      die("Failed to allocate image", width, height);
+    }
   }
 #else
-  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 32)) {
-    die("Failed to allocate image", width, height);
+  // Y4M reader has its own allocation.
+  if (input_ctx.file_type != FILE_TYPE_Y4M) {
+    if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 32)) {
+      die("Failed to allocate image", width, height);
+    }
   }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
@@ -758,13 +773,6 @@
 
   set_rate_control_metrics(&rc, &cfg);
 
-  // Target bandwidth for the whole stream.
-  // Set to layer_target_bitrate for highest layer (total bitrate).
-  cfg.rc_target_bitrate = rc.layer_target_bitrate[cfg.ts_number_layers - 1];
-
-  input_ctx.filename = argv[1];
-  open_input_file(&input_ctx);
-
   if (input_ctx.file_type == FILE_TYPE_Y4M) {
     if (input_ctx.width != cfg.g_w || input_ctx.height != cfg.g_h) {
       die("Incorrect width or height: %d x %d", cfg.g_w, cfg.g_h);
@@ -962,7 +970,10 @@
   // Try to rewrite the output file headers with the actual frame count.
   for (i = 0; i < cfg.ts_number_layers; ++i) vpx_video_writer_close(outfile[i]);
 
-  vpx_img_free(&raw);
+  if (input_ctx.file_type != FILE_TYPE_Y4M) {
+    vpx_img_free(&raw);
+  }
+
 #if ROI_MAP
   free(roi.roi_map);
 #endif
diff --git a/vpx_dsp/fastssim.c b/vpx_dsp/fastssim.c
index 0469071a..6ab6f55 100644
--- a/vpx_dsp/fastssim.c
+++ b/vpx_dsp/fastssim.c
@@ -128,10 +128,12 @@
       int i1;
       i0 = 2 * i;
       i1 = FS_MINI(i0 + 1, w2);
-      dst1[j * w + i] = src1[j0offs + i0] + src1[j0offs + i1] +
-                        src1[j1offs + i0] + src1[j1offs + i1];
-      dst2[j * w + i] = src2[j0offs + i0] + src2[j0offs + i1] +
-                        src2[j1offs + i0] + src2[j1offs + i1];
+      dst1[j * w + i] =
+          (uint32_t)((int64_t)src1[j0offs + i0] + src1[j0offs + i1] +
+                     src1[j1offs + i0] + src1[j1offs + i1]);
+      dst2[j * w + i] =
+          (uint32_t)((int64_t)src2[j0offs + i0] + src2[j0offs + i1] +
+                     src2[j1offs + i0] + src2[j1offs + i1]);
     }
   }
 }
@@ -220,12 +222,12 @@
   ssim = _ctx->level[_l].ssim;
   c1 = (double)(ssim_c1 * 4096 * (1 << 4 * _l));
   for (j = 0; j < h; j++) {
-    unsigned mux;
-    unsigned muy;
+    int64_t mux;
+    int64_t muy;
     int i0;
     int i1;
-    mux = 5 * col_sums_x[0];
-    muy = 5 * col_sums_y[0];
+    mux = (int64_t)5 * col_sums_x[0];
+    muy = (int64_t)5 * col_sums_y[0];
     for (i = 1; i < 4; i++) {
       i1 = FS_MINI(i, w - 1);
       mux += col_sums_x[i1];
@@ -237,8 +239,8 @@
       if (i + 1 < w) {
         i0 = FS_MAXI(0, i - 4);
         i1 = FS_MINI(i + 4, w - 1);
-        mux += col_sums_x[i1] - col_sums_x[i0];
-        muy += col_sums_x[i1] - col_sums_x[i0];
+        mux += (int)col_sums_x[i1] - (int)col_sums_x[i0];
+        muy += (int)col_sums_x[i1] - (int)col_sums_x[i0];
       }
     }
     if (j + 1 < h) {
@@ -246,8 +248,10 @@
       for (i = 0; i < w; i++) col_sums_x[i] -= im1[j0offs + i];
       for (i = 0; i < w; i++) col_sums_y[i] -= im2[j0offs + i];
       j1offs = FS_MINI(j + 4, h - 1) * w;
-      for (i = 0; i < w; i++) col_sums_x[i] += im1[j1offs + i];
-      for (i = 0; i < w; i++) col_sums_y[i] += im2[j1offs + i];
+      for (i = 0; i < w; i++)
+        col_sums_x[i] = (uint32_t)((int64_t)col_sums_x[i] + im1[j1offs + i]);
+      for (i = 0; i < w; i++)
+        col_sums_y[i] = (uint32_t)((int64_t)col_sums_y[i] + im2[j1offs + i]);
     }
   }
 }
@@ -343,18 +347,18 @@
   for (j = 0; j < h + 4; j++) {
     if (j < h - 1) {
       for (i = 0; i < w - 1; i++) {
-        unsigned g1;
-        unsigned g2;
-        unsigned gx;
-        unsigned gy;
-        g1 = abs((int)im1[(j + 1) * w + i + 1] - (int)im1[j * w + i]);
-        g2 = abs((int)im1[(j + 1) * w + i] - (int)im1[j * w + i + 1]);
+        int64_t g1;
+        int64_t g2;
+        int64_t gx;
+        int64_t gy;
+        g1 = labs((int64_t)im1[(j + 1) * w + i + 1] - (int64_t)im1[j * w + i]);
+        g2 = labs((int64_t)im1[(j + 1) * w + i] - (int64_t)im1[j * w + i + 1]);
         gx = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2);
-        g1 = abs((int)im2[(j + 1) * w + i + 1] - (int)im2[j * w + i]);
-        g2 = abs((int)im2[(j + 1) * w + i] - (int)im2[j * w + i + 1]);
-        gy = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2);
-        gx_buf[(j & 7) * stride + i + 4] = gx;
-        gy_buf[(j & 7) * stride + i + 4] = gy;
+        g1 = labs((int64_t)im2[(j + 1) * w + i + 1] - (int64_t)im2[j * w + i]);
+        g2 = labs((int64_t)im2[(j + 1) * w + i] - (int64_t)im2[j * w + i + 1]);
+        gy = ((int64_t)4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2));
+        gx_buf[(j & 7) * stride + i + 4] = (uint32_t)gx;
+        gy_buf[(j & 7) * stride + i + 4] = (uint32_t)gy;
       }
     } else {
       memset(gx_buf + (j & 7) * stride, 0, stride * sizeof(*gx_buf));