| /* |
| * Copyright © 2011 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| * |
| * Authors: |
| * Chris Wilson <chris@chris-wilson.co.uk> |
| * |
| */ |
| |
| #include <unistd.h> |
| #include <stdlib.h> |
| #include <stdint.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <fcntl.h> |
| #include <inttypes.h> |
| #include <errno.h> |
| #include <sys/stat.h> |
| #include <sys/poll.h> |
| #include <sys/ioctl.h> |
| #include <sys/time.h> |
| #include <time.h> |
| |
| #include "drm.h" |
| #include "ioctl_wrappers.h" |
| #include "drmtest.h" |
| #include "intel_chipset.h" |
| #include "intel_reg.h" |
| #include "igt_stats.h" |
| #include "i915/gem_create.h" |
| #include "i915/gem_mman.h" |
| |
| #define ENGINE_FLAGS (I915_EXEC_RING_MASK | I915_EXEC_BSD_MASK) |
| |
| #define WRITE 0x1 |
| #define IDLE 0x2 |
| #define DMABUF 0x4 |
| #define WAIT 0x8 |
| #define SYNC 0x10 |
| #define SYNCOBJ 0x20 |
| |
| struct local_gem_exec_fence { |
| uint32_t handle; |
| uint32_t flags; |
| }; |
| |
| static void gem_busy(int fd, uint32_t handle) |
| { |
| struct drm_i915_gem_busy busy = { .handle = handle }; |
| ioctl(fd, DRM_IOCTL_I915_GEM_BUSY, &busy); |
| } |
| |
| static void gem_wait__busy(int fd, uint32_t handle) |
| { |
| struct drm_i915_gem_wait wait = { .bo_handle = handle }; |
| ioctl(fd, DRM_IOCTL_I915_GEM_WAIT, &wait); |
| } |
| |
| static double elapsed(const struct timespec *start, |
| const struct timespec *end) |
| { |
| return 1e9*(end->tv_sec - start->tv_sec) + |
| (end->tv_nsec - start->tv_nsec); |
| } |
| |
| struct sync_merge_data { |
| char name[32]; |
| __s32 fd2; |
| __s32 fence; |
| __u32 flags; |
| __u32 pad; |
| }; |
| |
| #define SYNC_IOC_MAGIC '>' |
| #define SYNC_IOC_MERGE _IOWR(SYNC_IOC_MAGIC, 3, struct sync_merge_data) |
| |
| static int sync_merge(int fd1, int fd2) |
| { |
| struct sync_merge_data data; |
| |
| if (fd1 == -1) |
| return dup(fd2); |
| |
| if (fd2 == -1) |
| return dup(fd1); |
| |
| memset(&data, 0, sizeof(data)); |
| data.fd2 = fd2; |
| strcpy(data.name, "i965"); |
| |
| if (ioctl(fd1, SYNC_IOC_MERGE, &data)) |
| return -errno; |
| |
| return data.fence; |
| } |
| |
| static uint32_t __syncobj_create(int fd) |
| { |
| struct drm_syncobj_create arg; |
| |
| memset(&arg, 0, sizeof(arg)); |
| ioctl(fd, DRM_IOCTL_SYNCOBJ_CREATE, &arg); |
| |
| return arg.handle; |
| } |
| |
| static uint32_t syncobj_create(int fd) |
| { |
| uint32_t ret; |
| |
| igt_assert_neq((ret = __syncobj_create(fd)), 0); |
| |
| return ret; |
| } |
| |
| static int __syncobj_wait(int fd, struct drm_syncobj_wait *args) |
| { |
| int err = 0; |
| if (drmIoctl(fd, DRM_IOCTL_SYNCOBJ_WAIT, args)) |
| err = -errno; |
| return err; |
| } |
| |
| static int loop(unsigned ring, int reps, int ncpus, unsigned flags) |
| { |
| struct drm_i915_gem_execbuffer2 execbuf; |
| struct drm_i915_gem_exec_object2 obj[2]; |
| struct drm_i915_gem_relocation_entry reloc[2]; |
| struct local_gem_exec_fence syncobj; |
| unsigned engines[16]; |
| unsigned nengine; |
| uint32_t *batch; |
| double *shared; |
| int fd, i, gen; |
| int dmabuf; |
| |
| shared = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); |
| |
| fd = drm_open_driver(DRIVER_INTEL); |
| gen = intel_gen(intel_get_drm_devid(fd)); |
| |
| memset(obj, 0, sizeof(obj)); |
| obj[0].handle = gem_create(fd, 4096); |
| if (flags & WRITE) |
| obj[0].flags = EXEC_OBJECT_WRITE; |
| obj[1].handle = gem_create(fd, 4096); |
| batch = gem_mmap__device_coherent(fd, obj[1].handle, 0, 4096, PROT_WRITE); |
| gem_set_domain(fd, obj[1].handle, |
| I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); |
| batch[0] = MI_BATCH_BUFFER_END; |
| |
| memset(&execbuf, 0, sizeof(execbuf)); |
| execbuf.buffers_ptr = to_user_pointer(obj); |
| execbuf.buffer_count = 2; |
| execbuf.flags |= I915_EXEC_HANDLE_LUT; |
| execbuf.flags |= I915_EXEC_NO_RELOC; |
| if (__gem_execbuf(fd, &execbuf)) { |
| execbuf.flags = 0; |
| if (__gem_execbuf(fd, &execbuf)) |
| return 77; |
| } |
| |
| if (flags & SYNCOBJ) { |
| syncobj.handle = syncobj_create(fd); |
| syncobj.flags = I915_EXEC_FENCE_SIGNAL; |
| |
| execbuf.cliprects_ptr = to_user_pointer(&syncobj); |
| execbuf.num_cliprects = 1; |
| execbuf.flags |= I915_EXEC_FENCE_ARRAY; |
| } |
| |
| if (ring == -1) { |
| nengine = 0; |
| for (ring = 1; ring < 16; ring++) { |
| execbuf.flags &= ~ENGINE_FLAGS; |
| execbuf.flags |= ring; |
| if (__gem_execbuf(fd, &execbuf) == 0) |
| engines[nengine++] = ring; |
| } |
| } else { |
| nengine = 1; |
| engines[0] = ring; |
| } |
| |
| obj[1].relocs_ptr = to_user_pointer(reloc); |
| obj[1].relocation_count = 2; |
| |
| if (flags & DMABUF) |
| dmabuf = prime_handle_to_fd(fd, obj[0].handle); |
| |
| gem_set_domain(fd, obj[1].handle, |
| I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); |
| |
| reloc[0].target_handle = obj[1].handle; /* recurse */ |
| reloc[0].presumed_offset = obj[1].offset; |
| reloc[0].offset = sizeof(uint32_t); |
| reloc[0].delta = 0; |
| if (gen < 4) |
| reloc[0].delta = 1; |
| reloc[0].read_domains = I915_GEM_DOMAIN_COMMAND; |
| reloc[0].write_domain = 0; |
| |
| reloc[1].target_handle = obj[0].handle; |
| reloc[1].presumed_offset = obj[0].offset; |
| reloc[1].offset = 1024; |
| reloc[1].delta = 0; |
| reloc[1].read_domains = I915_GEM_DOMAIN_RENDER; |
| reloc[1].write_domain = 0; |
| if (flags & WRITE) |
| reloc[1].write_domain = I915_GEM_DOMAIN_RENDER; |
| |
| while (reps--) { |
| int fence = -1; |
| memset(shared, 0, 4096); |
| |
| gem_set_domain(fd, obj[1].handle, |
| I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); |
| sleep(1); /* wait for the hw to go back to sleep */ |
| batch[i = 0] = MI_BATCH_BUFFER_START; |
| if (gen >= 8) { |
| batch[i] |= 1 << 8 | 1; |
| batch[++i] = obj[1].offset; |
| batch[++i] = obj[1].offset >> 32; |
| } else if (gen >= 6) { |
| batch[i] |= 1 << 8; |
| batch[++i] = obj[1].offset; |
| } else { |
| batch[i] |= 2 << 6; |
| batch[++i] = obj[1].offset; |
| if (gen < 4) |
| batch[i] |= 1; |
| } |
| |
| if ((flags & IDLE) == 0) { |
| for (int n = 0; n < nengine; n++) { |
| execbuf.flags &= ~(3 << 16); |
| if (flags & SYNC) |
| execbuf.flags |= 1 << 17; |
| execbuf.flags &= ~ENGINE_FLAGS; |
| execbuf.flags |= engines[n]; |
| gem_execbuf_wr(fd, &execbuf); |
| if (execbuf.flags & (1 << 17)) |
| fence = sync_merge(fence, execbuf.rsvd2 >> 32); |
| } |
| } |
| |
| igt_fork(child, ncpus) { |
| struct timespec start, end; |
| unsigned count = 0; |
| |
| clock_gettime(CLOCK_MONOTONIC, &start); |
| do { |
| if (flags & DMABUF) { |
| struct pollfd pfd = { .fd = dmabuf, .events = POLLOUT }; |
| for (int inner = 0; inner < 1024; inner++) |
| poll(&pfd, 1, 0); |
| } else if (flags & SYNCOBJ) { |
| struct drm_syncobj_wait arg = { |
| .handles = to_user_pointer(&syncobj.handle), |
| .count_handles = 1, |
| }; |
| |
| for (int inner = 0; inner < 1024; inner++) |
| __syncobj_wait(fd, &arg); |
| } else if (flags & SYNC) { |
| struct pollfd pfd = { .fd = fence, .events = POLLOUT }; |
| for (int inner = 0; inner < 1024; inner++) |
| poll(&pfd, 1, 0); |
| } else if (flags & WAIT) { |
| for (int inner = 0; inner < 1024; inner++) |
| gem_wait__busy(fd, obj[0].handle); |
| } else { |
| for (int inner = 0; inner < 1024; inner++) |
| gem_busy(fd, obj[0].handle); |
| } |
| |
| clock_gettime(CLOCK_MONOTONIC, &end); |
| count += 1024; |
| } while (elapsed(&start, &end) < 2e9); |
| |
| clock_gettime(CLOCK_MONOTONIC, &end); |
| shared[child] = elapsed(&start, &end) / count; |
| } |
| igt_waitchildren(); |
| |
| batch[0] = MI_BATCH_BUFFER_END; |
| if (fence != -1) |
| close(fence); |
| |
| for (int child = 0; child < ncpus; child++) |
| shared[ncpus] += shared[child]; |
| printf("%7.3f\n", shared[ncpus] / ncpus); |
| } |
| return 0; |
| } |
| |
| int main(int argc, char **argv) |
| { |
| unsigned ring = I915_EXEC_RENDER; |
| unsigned flags = 0; |
| int reps = 1; |
| int ncpus = 1; |
| int c; |
| |
| while ((c = getopt (argc, argv, "e:r:dfsSwWI")) != -1) { |
| switch (c) { |
| case 'e': |
| if (strcmp(optarg, "rcs") == 0) |
| ring = I915_EXEC_RENDER; |
| else if (strcmp(optarg, "vcs") == 0) |
| ring = I915_EXEC_BSD; |
| else if (strcmp(optarg, "bcs") == 0) |
| ring = I915_EXEC_BLT; |
| else if (strcmp(optarg, "vecs") == 0) |
| ring = I915_EXEC_VEBOX; |
| else if (strcmp(optarg, "all") == 0) |
| ring = -1; |
| else |
| ring = atoi(optarg); |
| break; |
| |
| case 'r': |
| reps = atoi(optarg); |
| if (reps < 1) |
| reps = 1; |
| break; |
| |
| case 'f': |
| ncpus = sysconf(_SC_NPROCESSORS_ONLN); |
| break; |
| |
| case 'd': |
| flags |= DMABUF; |
| break; |
| |
| case 'w': |
| flags |= WAIT; |
| break; |
| |
| case 's': |
| flags |= SYNC; |
| break; |
| |
| case 'S': |
| flags |= SYNCOBJ; |
| break; |
| |
| case 'W': |
| flags |= WRITE; |
| break; |
| |
| case 'I': |
| flags |= IDLE; |
| break; |
| default: |
| break; |
| } |
| } |
| |
| return loop(ring, reps, ncpus, flags); |
| } |