| // Copyright 2014 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include <assert.h> |
| #include <math.h> |
| #include <stdint.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <sys/time.h> |
| #include <unistd.h> |
| |
| #include <ppapi/c/ppb_input_event.h> |
| #include <ppapi/cpp/fullscreen.h> |
| #include <ppapi/cpp/input_event.h> |
| #include <ppapi/cpp/instance_handle.h> |
| #include <ppapi/cpp/var.h> |
| #include <ppapi/cpp/var_array.h> |
| #include <ppapi/cpp/var_array_buffer.h> |
| #include <ppapi/cpp/var_dictionary.h> |
| |
| #include "ppapi_simple/ps.h" |
| #include "ppapi_simple/ps_context_2d.h" |
| #include "ppapi_simple/ps_event.h" |
| #include "ppapi_simple/ps_instance.h" |
| #include "ppapi_simple/ps_interface.h" |
| #include "sdk_util/macros.h" |
| #include "sdk_util/thread_pool.h" |
| |
| using namespace sdk_util; // For sdk_util::ThreadPool |
| |
| namespace { |
| |
| #define INLINE inline __attribute__((always_inline)) |
| |
| // BGRA helper macro, for constructing a pixel for a BGRA buffer. |
| #define MakeBGRA(b, g, r, a) \ |
| (((a) << 24) | ((r) << 16) | ((g) << 8) | (b)) |
| |
| const int kFramesToBenchmark = 100; |
| const int kCellAlignment = 0x10; |
| |
| // 128 bit vector types |
| typedef uint8_t u8x16_t __attribute__ ((vector_size (16))); |
| |
| // Helper function to broadcast x across 16 element vector. |
| INLINE u8x16_t broadcast(uint8_t x) { |
| u8x16_t r = {x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x}; |
| return r; |
| } |
| |
| // Convert a count value into a live (green) or dead color value. |
| const uint32_t kNeighborColors[] = { |
| MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| MakeBGRA(0x00, 0xFF, 0x00, 0xFF), |
| MakeBGRA(0x00, 0xFF, 0x00, 0xFF), |
| MakeBGRA(0x00, 0xFF, 0x00, 0xFF), |
| MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| }; |
| |
| // These represent the new health value of a cell based on its neighboring |
| // values. The health is binary: either alive or dead. |
| const uint8_t kIsAlive[] = { |
| 0, 0, 0, 0, 0, 1, 1, 1, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0 |
| }; |
| |
| // Timer helper for benchmarking. Returns seconds elapsed since program start, |
| // as a double. |
| timeval start_tv; |
| int start_tv_retv = gettimeofday(&start_tv, NULL); |
| |
| inline double getseconds() { |
| const double usec_to_sec = 0.000001; |
| timeval tv; |
| if ((0 == start_tv_retv) && (0 == gettimeofday(&tv, NULL))) |
| return (tv.tv_sec - start_tv.tv_sec) + tv.tv_usec * usec_to_sec; |
| return 0.0; |
| } |
| } // namespace |
| |
| |
| class Life { |
| public: |
| Life(); |
| virtual ~Life(); |
| // Runs a tick of the simulations, update 2D output. |
| void Update(); |
| // Handle event from user, or message from JS. |
| void HandleEvent(PSEvent* ps_event); |
| private: |
| void UpdateContext(); |
| void DrawCell(int32_t x, int32_t y); |
| void ProcessTouchEvent(const pp::TouchInputEvent& touches); |
| void PostUpdateMessage(const char* message, double value); |
| void StartBenchmark(); |
| void EndBenchmark(); |
| void Stir(); |
| void wSimulate(int y); |
| static void wSimulateEntry(int y, void* data); |
| void Simulate(); |
| |
| bool simd_; |
| bool multithread_; |
| bool benchmarking_; |
| int benchmark_frame_counter_; |
| double bench_start_time_; |
| double bench_end_time_; |
| uint8_t* cell_in_; |
| uint8_t* cell_out_; |
| int32_t cell_stride_; |
| int32_t width_; |
| int32_t height_; |
| PSContext2D_t* ps_context_; |
| ThreadPool* workers_; |
| }; |
| |
| Life::Life() : |
| simd_(true), |
| multithread_(true), |
| benchmarking_(false), |
| benchmark_frame_counter_(0), |
| bench_start_time_(0.0), |
| bench_end_time_(0.0), |
| cell_in_(NULL), |
| cell_out_(NULL), |
| cell_stride_(0), |
| width_(0), |
| height_(0) { |
| ps_context_ = PSContext2DAllocate(PP_IMAGEDATAFORMAT_BGRA_PREMUL); |
| // Query system for number of processors via sysconf() |
| int num_threads = sysconf(_SC_NPROCESSORS_ONLN); |
| if (num_threads < 2) |
| num_threads = 2; |
| workers_ = new ThreadPool(num_threads); |
| PSEventSetFilter(PSE_ALL); |
| } |
| |
| Life::~Life() { |
| delete workers_; |
| PSContext2DFree(ps_context_); |
| } |
| |
| void Life::UpdateContext() { |
| cell_stride_ = (ps_context_->width + kCellAlignment - 1) & |
| ~(kCellAlignment - 1); |
| size_t size = cell_stride_ * ps_context_->height; |
| |
| if (ps_context_->width != width_ || ps_context_->height != height_) { |
| free(cell_in_); |
| free(cell_out_); |
| |
| // Create a new context |
| void* in_buffer = NULL; |
| void* out_buffer = NULL; |
| // alloc buffers aligned on 16 bytes |
| posix_memalign(&in_buffer, kCellAlignment, size); |
| posix_memalign(&out_buffer, kCellAlignment, size); |
| cell_in_ = (uint8_t*) in_buffer; |
| cell_out_ = (uint8_t*) out_buffer; |
| |
| memset(cell_out_, 0, size); |
| for (size_t index = 0; index < size; index++) { |
| cell_in_[index] = rand() & 1; |
| } |
| width_ = ps_context_->width; |
| height_ = ps_context_->height; |
| } |
| } |
| |
| void Life::DrawCell(int32_t x, int32_t y) { |
| if (!cell_in_) return; |
| if (x > 0 && x < ps_context_->width - 1 && |
| y > 0 && y < ps_context_->height - 1) { |
| cell_in_[x - 1 + y * cell_stride_] = 1; |
| cell_in_[x + 1 + y * cell_stride_] = 1; |
| cell_in_[x + (y - 1) * cell_stride_] = 1; |
| cell_in_[x + (y + 1) * cell_stride_] = 1; |
| } |
| } |
| |
| void Life::ProcessTouchEvent(const pp::TouchInputEvent& touches) { |
| uint32_t count = touches.GetTouchCount(PP_TOUCHLIST_TYPE_TOUCHES); |
| uint32_t i, j; |
| for (i = 0; i < count; i++) { |
| pp::TouchPoint touch = |
| touches.GetTouchByIndex(PP_TOUCHLIST_TYPE_TOUCHES, i); |
| int radius = (int)(touch.radii().x()); |
| int x = (int)(touch.position().x()); |
| int y = (int)(touch.position().y()); |
| // num = 1/100th the area of touch point |
| uint32_t num = (uint32_t)(M_PI * radius * radius / 100.0f); |
| for (j = 0; j < num; j++) { |
| int dx = rand() % (radius * 2) - radius; |
| int dy = rand() % (radius * 2) - radius; |
| // only plot random cells within the touch area |
| if (dx * dx + dy * dy <= radius * radius) |
| DrawCell(x + dx, y + dy); |
| } |
| } |
| } |
| |
| void Life::PostUpdateMessage(const char* message_name, double value) { |
| pp::VarDictionary message; |
| message.Set("message", message_name); |
| message.Set("value", value); |
| PSInterfaceMessaging()->PostMessage(PSGetInstanceId(), message.pp_var()); |
| } |
| |
| void Life::StartBenchmark() { |
| printf("Running benchmark... (SIMD: %s, multi-threading: %s, size: %dx%d)\n", |
| simd_ ? "enabled" : "disabled", |
| multithread_ ? "enabled" : "disabled", |
| ps_context_->width, |
| ps_context_->height); |
| benchmarking_ = true; |
| bench_start_time_ = getseconds(); |
| benchmark_frame_counter_ = kFramesToBenchmark; |
| } |
| |
| void Life::EndBenchmark() { |
| double total_time; |
| bench_end_time_ = getseconds(); |
| benchmarking_ = false; |
| total_time = bench_end_time_ - bench_start_time_; |
| printf("Finished - benchmark took %f seconds\n", total_time); |
| // Send benchmark result to JS. |
| PostUpdateMessage("benchmark_result", total_time); |
| } |
| |
| void Life::HandleEvent(PSEvent* ps_event) { |
| // Give the 2D context a chance to process the event. |
| if (0 != PSContext2DHandleEvent(ps_context_, ps_event)) { |
| UpdateContext(); |
| return; |
| } |
| |
| switch(ps_event->type) { |
| |
| case PSE_INSTANCE_HANDLEINPUT: { |
| pp::InputEvent event(ps_event->as_resource); |
| |
| switch(event.GetType()) { |
| case PP_INPUTEVENT_TYPE_MOUSEDOWN: |
| case PP_INPUTEVENT_TYPE_MOUSEMOVE: { |
| pp::MouseInputEvent mouse = pp::MouseInputEvent(event); |
| // If the button is down, draw |
| if (mouse.GetModifiers() & PP_INPUTEVENT_MODIFIER_LEFTBUTTONDOWN) { |
| PP_Point location = mouse.GetPosition(); |
| DrawCell(location.x, location.y); |
| } |
| break; |
| } |
| |
| case PP_INPUTEVENT_TYPE_TOUCHSTART: |
| case PP_INPUTEVENT_TYPE_TOUCHMOVE: { |
| pp::TouchInputEvent touches = pp::TouchInputEvent(event); |
| ProcessTouchEvent(touches); |
| break; |
| } |
| |
| case PP_INPUTEVENT_TYPE_KEYDOWN: { |
| pp::Fullscreen fullscreen((pp::InstanceHandle(PSGetInstanceId()))); |
| bool isFullscreen = fullscreen.IsFullscreen(); |
| fullscreen.SetFullscreen(!isFullscreen); |
| break; |
| } |
| |
| default: |
| break; |
| } |
| break; // case PSE_INSTANCE_HANDLEINPUT |
| } |
| |
| case PSE_INSTANCE_HANDLEMESSAGE: { |
| // Convert Pepper Simple message to PPAPI C++ vars |
| pp::Var var(ps_event->as_var); |
| if (var.is_dictionary()) { |
| pp::VarDictionary dictionary(var); |
| std::string message = dictionary.Get("message").AsString(); |
| if (message == "run_benchmark" && !benchmarking_) { |
| StartBenchmark(); |
| } else if (message == "set_simd") { |
| simd_ = dictionary.Get("value").AsBool(); |
| } else if (message == "set_threading") { |
| multithread_ = dictionary.Get("value").AsBool(); |
| } |
| } |
| break; // case PSE_INSTANCE_HANDLEMESSAGE |
| } |
| |
| default: |
| break; |
| } |
| } |
| |
| void Life::Stir() { |
| int32_t width = ps_context_->width; |
| int32_t height = ps_context_->height; |
| int32_t stride = cell_stride_; |
| int32_t i; |
| if (cell_in_ == NULL || cell_out_ == NULL) |
| return; |
| |
| for (i = 0; i < width; ++i) { |
| cell_in_[i] = rand() & 1; |
| cell_in_[i + (height - 1) * stride] = rand() & 1; |
| } |
| for (i = 0; i < height; ++i) { |
| cell_in_[i * stride] = rand() & 1; |
| cell_in_[i * stride + (width - 1)] = rand() & 1; |
| } |
| } |
| |
| void Life::wSimulate(int y) { |
| // Don't run simulation on top and bottom borders |
| if (y < 1 || y >= ps_context_->height - 1) |
| return; |
| |
| // Do neighbor summation; apply rules, output pixel color. Note that a 1 cell |
| // wide perimeter is excluded from the simulation update; only cells from |
| // x = 1 to x < width - 1 and y = 1 to y < height - 1 are updated. |
| uint8_t *src0 = (cell_in_ + (y - 1) * cell_stride_); |
| uint8_t *src1 = src0 + cell_stride_; |
| uint8_t *src2 = src1 + cell_stride_; |
| uint8_t *dst = (cell_out_ + y * cell_stride_) + 1; |
| uint32_t *pixels = static_cast<uint32_t *>(ps_context_->data); |
| uint32_t *pixel_line = // static_cast<uint32_t*> |
| (pixels + y * ps_context_->stride / sizeof(uint32_t)); |
| int32_t x = 1; |
| |
| if (simd_) { |
| const u8x16_t kOne = broadcast(1); |
| const u8x16_t kFour = broadcast(4); |
| const u8x16_t kEight = broadcast(8); |
| const u8x16_t kZero255 = {0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; |
| |
| // Prime the src |
| u8x16_t src00 = *reinterpret_cast<u8x16_t*>(&src0[0]); |
| u8x16_t src01 = *reinterpret_cast<u8x16_t*>(&src0[16]); |
| u8x16_t src10 = *reinterpret_cast<u8x16_t*>(&src1[0]); |
| u8x16_t src11 = *reinterpret_cast<u8x16_t*>(&src1[16]); |
| u8x16_t src20 = *reinterpret_cast<u8x16_t*>(&src2[0]); |
| u8x16_t src21 = *reinterpret_cast<u8x16_t*>(&src2[16]); |
| |
| // This inner loop is SIMD - each loop iteration will process 16 cells. |
| for (; (x + 15) < (ps_context_->width - 1); x += 16) { |
| |
| // Construct jittered source temps, using __builtin_shufflevector(..) to |
| // extract a shifted 16 element vector from the 32 element concatenation |
| // of two source vectors. |
| u8x16_t src0j0 = src00; |
| u8x16_t src0j1 = __builtin_shufflevector(src00, src01, |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
| u8x16_t src0j2 = __builtin_shufflevector(src00, src01, |
| 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); |
| u8x16_t src1j0 = src10; |
| u8x16_t src1j1 = __builtin_shufflevector(src10, src11, |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
| u8x16_t src1j2 = __builtin_shufflevector(src10, src11, |
| 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); |
| u8x16_t src2j0 = src20; |
| u8x16_t src2j1 = __builtin_shufflevector(src20, src21, |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
| u8x16_t src2j2 = __builtin_shufflevector(src20, src21, |
| 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); |
| |
| // Sum the jittered sources to construct neighbor count. |
| u8x16_t count = src0j0 + src0j1 + src0j2 + |
| src1j0 + + src1j2 + |
| src2j0 + src2j1 + src2j2; |
| // Add the center cell. |
| count = count + count + src1j1; |
| // If count > 4 and < 8, center cell will be alive in the next frame. |
| u8x16_t alive1 = count > kFour; |
| u8x16_t alive2 = count < kEight; |
| // Intersect the two comparisons from above. |
| u8x16_t alive = alive1 & alive2; |
| |
| // At this point, alive[x] will be one of two values: |
| // 0x00 for a dead cell |
| // 0xFF for an alive cell. |
| // |
| // Next, convert alive cells to green pixel color. |
| // Use __builtin_shufflevector(..) to construct output pixels from |
| // concantination of alive vector and kZero255 const vector. |
| // Indices 0..15 select the 16 cells from alive vector. |
| // Index 16 is zero constant from kZero255 constant vector. |
| // Index 17 is 255 constant from kZero255 constant vector. |
| // Output pixel color values are in BGRABGRABGRABGRA order. |
| // Since each pixel needs 4 bytes of color information, 16 cells will |
| // need to expand to 4 separate 16 byte pixel splats. |
| u8x16_t pixel0_3 = __builtin_shufflevector(alive, kZero255, |
| 16, 0, 16, 17, 16, 1, 16, 17, 16, 2, 16, 17, 16, 3, 16, 17); |
| u8x16_t pixel4_7 = __builtin_shufflevector(alive, kZero255, |
| 16, 4, 16, 17, 16, 5, 16, 17, 16, 6, 16, 17, 16, 7, 16, 17); |
| u8x16_t pixel8_11 = __builtin_shufflevector(alive, kZero255, |
| 16, 8, 16, 17, 16, 9, 16, 17, 16, 10, 16, 17, 16, 11, 16, 17); |
| u8x16_t pixel12_15 = __builtin_shufflevector(alive, kZero255, |
| 16, 12, 16, 17, 16, 13, 16, 17, 16, 14, 16, 17, 16, 15, 16, 17); |
| |
| // Write 16 pixels to output pixel buffer. |
| *reinterpret_cast<u8x16_t*>(pixel_line + 0) = pixel0_3; |
| *reinterpret_cast<u8x16_t*>(pixel_line + 4) = pixel4_7; |
| *reinterpret_cast<u8x16_t*>(pixel_line + 8) = pixel8_11; |
| *reinterpret_cast<u8x16_t*>(pixel_line + 12) = pixel12_15; |
| |
| // Convert alive mask to 1 or 0 and store in destination cell array. |
| *reinterpret_cast<u8x16_t*>(dst) = alive & kOne; |
| |
| // Increment pointers. |
| pixel_line += 16; |
| dst += 16; |
| src0 += 16; |
| src1 += 16; |
| src2 += 16; |
| |
| // Shift source over by 16 cells and read the next 16 cells. |
| src00 = src01; |
| src01 = *reinterpret_cast<u8x16_t*>(&src0[16]); |
| src10 = src11; |
| src11 = *reinterpret_cast<u8x16_t*>(&src1[16]); |
| src20 = src21; |
| src21 = *reinterpret_cast<u8x16_t*>(&src2[16]); |
| } |
| } |
| |
| // The SIMD loop above does 16 cells at a time. The loop below is the |
| // regular version which processes one cell at a time. It is used to |
| // finish the remainder of the scanline not handled by the SIMD loop. |
| for (; x < (ps_context_->width - 1); ++x) { |
| // Sum the jittered sources to construct neighbor count. |
| int count = src0[0] + src0[1] + src0[2] + |
| src1[0] + + src1[2] + |
| src2[0] + src2[1] + src2[2]; |
| // Add the center cell. |
| count = count + count + src1[1]; |
| // Use table lookup indexed by count to determine pixel & alive state. |
| uint32_t color = kNeighborColors[count]; |
| *pixel_line++ = color; |
| *dst++ = kIsAlive[count]; |
| ++src0; |
| ++src1; |
| ++src2; |
| } |
| } |
| |
| // Static entry point for worker thread. |
| void Life::wSimulateEntry(int slice, void* thiz) { |
| static_cast<Life*>(thiz)->wSimulate(slice); |
| } |
| |
| void Life::Simulate() { |
| // Stir up the edges to prevent the simulation from reaching steady state. |
| Stir(); |
| |
| if (multithread_) { |
| // If multi-threading enabled, dispatch tasks to pool of worker threads. |
| workers_->Dispatch(ps_context_->height, wSimulateEntry, this); |
| } else { |
| // Else manually simulate each line on this thread. |
| for (int y = 0; y < ps_context_->height; y++) { |
| wSimulateEntry(y, this); |
| } |
| } |
| std::swap(cell_in_, cell_out_); |
| } |
| |
| void Life::Update() { |
| |
| PSContext2DGetBuffer(ps_context_); |
| if (NULL == ps_context_->data) |
| return; |
| |
| // If we somehow have not allocated these pointers yet, skip this frame. |
| if (!cell_in_ || !cell_out_) return; |
| |
| // Simulate one (or more if benchmarking) frames |
| do { |
| Simulate(); |
| if (!benchmarking_) |
| break; |
| --benchmark_frame_counter_; |
| } while(benchmark_frame_counter_ > 0); |
| if (benchmarking_) |
| EndBenchmark(); |
| |
| PSContext2DSwapBuffer(ps_context_); |
| } |
| |
| // Starting point for the module. We do not use main since it would |
| // collide with main in libppapi_cpp. |
| int main(int argc, char* argv[]) { |
| Life life; |
| while (true) { |
| PSEvent* ps_event; |
| // Consume all available events |
| while ((ps_event = PSEventTryAcquire()) != NULL) { |
| life.HandleEvent(ps_event); |
| PSEventRelease(ps_event); |
| } |
| // Do simulation, render and present. |
| life.Update(); |
| } |
| return 0; |
| } |