| // Copyright 2018 The gVisor Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Package shm implements sysv shared memory segments. |
| // |
| // Known missing features: |
| // |
| // - SHM_LOCK/SHM_UNLOCK are no-ops. The sentry currently doesn't implement |
| // memory locking in general. |
| // |
| // - SHM_HUGETLB and related flags for shmget(2) are ignored. There's no easy |
| // way to implement hugetlb support on a per-map basis, and it has no impact |
| // on correctness. |
| // |
| // - SHM_NORESERVE for shmget(2) is ignored, the sentry doesn't implement swap |
| // so it's meaningless to reserve space for swap. |
| // |
| // - No per-process segment size enforcement. This feature probably isn't used |
| // much anyways, since Linux sets the per-process limits to the system-wide |
| // limits by default. |
| // |
| // Lock ordering: mm.mappingMu -> shm registry lock -> shm lock |
| package shm |
| |
| import ( |
| "fmt" |
| |
| "gvisor.dev/gvisor/pkg/abi/linux" |
| "gvisor.dev/gvisor/pkg/context" |
| "gvisor.dev/gvisor/pkg/log" |
| "gvisor.dev/gvisor/pkg/sentry/fs" |
| "gvisor.dev/gvisor/pkg/sentry/kernel/auth" |
| ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" |
| "gvisor.dev/gvisor/pkg/sentry/memmap" |
| "gvisor.dev/gvisor/pkg/sentry/pgalloc" |
| "gvisor.dev/gvisor/pkg/sentry/usage" |
| "gvisor.dev/gvisor/pkg/sync" |
| "gvisor.dev/gvisor/pkg/syserror" |
| "gvisor.dev/gvisor/pkg/usermem" |
| ) |
| |
| // Key represents a shm segment key. Analogous to a file name. |
| type Key int32 |
| |
| // ID represents the opaque handle for a shm segment. Analogous to an fd. |
| type ID int32 |
| |
| // Registry tracks all shared memory segments in an IPC namespace. The registry |
| // provides the mechanisms for creating and finding segments, and reporting |
| // global shm parameters. |
| // |
| // +stateify savable |
| type Registry struct { |
| // userNS owns the IPC namespace this registry belong to. Immutable. |
| userNS *auth.UserNamespace |
| |
| // mu protects all fields below. |
| mu sync.Mutex `state:"nosave"` |
| |
| // shms maps segment ids to segments. |
| // |
| // shms holds all referenced segments, which are removed on the last |
| // DecRef. Thus, it cannot itself hold a reference on the Shm. |
| // |
| // Since removal only occurs after the last (unlocked) DecRef, there |
| // exists a short window during which a Shm still exists in Shm, but is |
| // unreferenced. Users must use TryIncRef to determine if the Shm is |
| // still valid. |
| shms map[ID]*Shm |
| |
| // keysToShms maps segment keys to segments. |
| // |
| // Shms in keysToShms are guaranteed to be referenced, as they are |
| // removed by disassociateKey before the last DecRef. |
| keysToShms map[Key]*Shm |
| |
| // Sum of the sizes of all existing segments rounded up to page size, in |
| // units of page size. |
| totalPages uint64 |
| |
| // ID assigned to the last created segment. Used to quickly find the next |
| // unused ID. |
| lastIDUsed ID |
| } |
| |
| // NewRegistry creates a new shm registry. |
| func NewRegistry(userNS *auth.UserNamespace) *Registry { |
| return &Registry{ |
| userNS: userNS, |
| shms: make(map[ID]*Shm), |
| keysToShms: make(map[Key]*Shm), |
| } |
| } |
| |
| // FindByID looks up a segment given an ID. |
| // |
| // FindByID returns a reference on Shm. |
| func (r *Registry) FindByID(id ID) *Shm { |
| r.mu.Lock() |
| defer r.mu.Unlock() |
| s := r.shms[id] |
| // Take a reference on s. If TryIncRef fails, s has reached the last |
| // DecRef, but hasn't quite been removed from r.shms yet. |
| if s != nil && s.TryIncRef() { |
| return s |
| } |
| return nil |
| } |
| |
| // dissociateKey removes the association between a segment and its key, |
| // preventing it from being discovered in the registry. This doesn't necessarily |
| // mean the segment is about to be destroyed. This is analogous to unlinking a |
| // file; the segment can still be used by a process already referencing it, but |
| // cannot be discovered by a new process. |
| func (r *Registry) dissociateKey(s *Shm) { |
| r.mu.Lock() |
| defer r.mu.Unlock() |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| if s.key != linux.IPC_PRIVATE { |
| delete(r.keysToShms, s.key) |
| s.key = linux.IPC_PRIVATE |
| } |
| } |
| |
| // FindOrCreate looks up or creates a segment in the registry. It's functionally |
| // analogous to open(2). |
| // |
| // FindOrCreate returns a reference on Shm. |
| func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size uint64, mode linux.FileMode, private, create, exclusive bool) (*Shm, error) { |
| if (create || private) && (size < linux.SHMMIN || size > linux.SHMMAX) { |
| // "A new segment was to be created and size is less than SHMMIN or |
| // greater than SHMMAX." - man shmget(2) |
| // |
| // Note that 'private' always implies the creation of a new segment |
| // whether IPC_CREAT is specified or not. |
| return nil, syserror.EINVAL |
| } |
| |
| r.mu.Lock() |
| defer r.mu.Unlock() |
| |
| if len(r.shms) >= linux.SHMMNI { |
| // "All possible shared memory IDs have been taken (SHMMNI) ..." |
| // - man shmget(2) |
| return nil, syserror.ENOSPC |
| } |
| |
| if !private { |
| // Look up an existing segment. |
| if shm := r.keysToShms[key]; shm != nil { |
| shm.mu.Lock() |
| defer shm.mu.Unlock() |
| |
| // Check that caller can access the segment. |
| if !shm.checkPermissions(ctx, fs.PermsFromMode(mode)) { |
| // "The user does not have permission to access the shared |
| // memory segment, and does not have the CAP_IPC_OWNER |
| // capability in the user namespace that governs its IPC |
| // namespace." - man shmget(2) |
| return nil, syserror.EACCES |
| } |
| |
| if size > shm.size { |
| // "A segment for the given key exists, but size is greater than |
| // the size of that segment." - man shmget(2) |
| return nil, syserror.EINVAL |
| } |
| |
| if create && exclusive { |
| // "IPC_CREAT and IPC_EXCL were specified in shmflg, but a |
| // shared memory segment already exists for key." |
| // - man shmget(2) |
| return nil, syserror.EEXIST |
| } |
| |
| shm.IncRef() |
| return shm, nil |
| } |
| |
| if !create { |
| // "No segment exists for the given key, and IPC_CREAT was not |
| // specified." - man shmget(2) |
| return nil, syserror.ENOENT |
| } |
| } |
| |
| var sizeAligned uint64 |
| if val, ok := usermem.Addr(size).RoundUp(); ok { |
| sizeAligned = uint64(val) |
| } else { |
| return nil, syserror.EINVAL |
| } |
| |
| if numPages := sizeAligned / usermem.PageSize; r.totalPages+numPages > linux.SHMALL { |
| // "... allocating a segment of the requested size would cause the |
| // system to exceed the system-wide limit on shared memory (SHMALL)." |
| // - man shmget(2) |
| return nil, syserror.ENOSPC |
| } |
| |
| // Need to create a new segment. |
| creator := fs.FileOwnerFromContext(ctx) |
| perms := fs.FilePermsFromMode(mode) |
| s, err := r.newShm(ctx, pid, key, creator, perms, size) |
| if err != nil { |
| return nil, err |
| } |
| // The initial reference is held by s itself. Take another to return to |
| // the caller. |
| s.IncRef() |
| return s, nil |
| } |
| |
| // newShm creates a new segment in the registry. |
| // |
| // Precondition: Caller must hold r.mu. |
| func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.FileOwner, perms fs.FilePermissions, size uint64) (*Shm, error) { |
| mfp := pgalloc.MemoryFileProviderFromContext(ctx) |
| if mfp == nil { |
| panic(fmt.Sprintf("context.Context %T lacks non-nil value for key %T", ctx, pgalloc.CtxMemoryFileProvider)) |
| } |
| |
| effectiveSize := uint64(usermem.Addr(size).MustRoundUp()) |
| fr, err := mfp.MemoryFile().Allocate(effectiveSize, usage.Anonymous) |
| if err != nil { |
| return nil, err |
| } |
| |
| shm := &Shm{ |
| mfp: mfp, |
| registry: r, |
| creator: creator, |
| size: size, |
| effectiveSize: effectiveSize, |
| fr: fr, |
| key: key, |
| perms: perms, |
| owner: creator, |
| creatorPID: pid, |
| changeTime: ktime.NowFromContext(ctx), |
| } |
| shm.InitRefs() |
| |
| // Find the next available ID. |
| for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ { |
| // Handle wrap around. |
| if id < 0 { |
| id = 0 |
| continue |
| } |
| if r.shms[id] == nil { |
| r.lastIDUsed = id |
| |
| shm.ID = id |
| r.shms[id] = shm |
| r.keysToShms[key] = shm |
| |
| r.totalPages += effectiveSize / usermem.PageSize |
| |
| return shm, nil |
| } |
| } |
| |
| log.Warningf("Shm ids exhuasted, they may be leaking") |
| return nil, syserror.ENOSPC |
| } |
| |
| // IPCInfo reports global parameters for sysv shared memory segments on this |
| // system. See shmctl(IPC_INFO). |
| func (r *Registry) IPCInfo() *linux.ShmParams { |
| return &linux.ShmParams{ |
| ShmMax: linux.SHMMAX, |
| ShmMin: linux.SHMMIN, |
| ShmMni: linux.SHMMNI, |
| ShmSeg: linux.SHMSEG, |
| ShmAll: linux.SHMALL, |
| } |
| } |
| |
| // ShmInfo reports linux-specific global parameters for sysv shared memory |
| // segments on this system. See shmctl(SHM_INFO). |
| func (r *Registry) ShmInfo() *linux.ShmInfo { |
| r.mu.Lock() |
| defer r.mu.Unlock() |
| |
| return &linux.ShmInfo{ |
| UsedIDs: int32(r.lastIDUsed), |
| ShmTot: r.totalPages, |
| ShmRss: r.totalPages, // We could probably get a better estimate from memory accounting. |
| ShmSwp: 0, // No reclaim at the moment. |
| } |
| } |
| |
| // remove deletes a segment from this registry, deaccounting the memory used by |
| // the segment. |
| // |
| // Precondition: Must follow a call to r.dissociateKey(s). |
| func (r *Registry) remove(s *Shm) { |
| r.mu.Lock() |
| defer r.mu.Unlock() |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| |
| if s.key != linux.IPC_PRIVATE { |
| panic(fmt.Sprintf("Attempted to remove %s from the registry whose key is still associated", s.debugLocked())) |
| } |
| |
| delete(r.shms, s.ID) |
| r.totalPages -= s.effectiveSize / usermem.PageSize |
| } |
| |
| // Release drops the self-reference of each active shm segment in the registry. |
| // It is called when the kernel.IPCNamespace containing r is being destroyed. |
| func (r *Registry) Release(ctx context.Context) { |
| // Because Shm.DecRef() may acquire the same locks, collect the segments to |
| // release first. Note that this should not race with any updates to r, since |
| // the IPC namespace containing it has no more references. |
| toRelease := make([]*Shm, 0) |
| r.mu.Lock() |
| for _, s := range r.keysToShms { |
| s.mu.Lock() |
| if !s.pendingDestruction { |
| toRelease = append(toRelease, s) |
| } |
| s.mu.Unlock() |
| } |
| r.mu.Unlock() |
| |
| for _, s := range toRelease { |
| r.dissociateKey(s) |
| s.DecRef(ctx) |
| } |
| } |
| |
| // Shm represents a single shared memory segment. |
| // |
| // Shm segments are backed directly by an allocation from platform memory. |
| // Segments are always mapped as a whole, greatly simplifying how mappings are |
| // tracked. However note that mremap and munmap calls may cause the vma for a |
| // segment to become fragmented; which requires special care when unmapping a |
| // segment. See mm/shm.go. |
| // |
| // Segments persist until they are explicitly marked for destruction via |
| // MarkDestroyed(). |
| // |
| // Shm implements memmap.Mappable and memmap.MappingIdentity. |
| // |
| // +stateify savable |
| type Shm struct { |
| // ShmRefs tracks the number of references to this segment. |
| // |
| // A segment holds a reference to itself until it is marked for |
| // destruction. |
| // |
| // In addition to direct users, the MemoryManager will hold references |
| // via MappingIdentity. |
| ShmRefs |
| |
| mfp pgalloc.MemoryFileProvider |
| |
| // registry points to the shm registry containing this segment. Immutable. |
| registry *Registry |
| |
| // ID is the kernel identifier for this segment. Immutable. |
| ID ID |
| |
| // creator is the user that created the segment. Immutable. |
| creator fs.FileOwner |
| |
| // size is the requested size of the segment at creation, in |
| // bytes. Immutable. |
| size uint64 |
| |
| // effectiveSize of the segment, rounding up to the next page |
| // boundary. Immutable. |
| // |
| // Invariant: effectiveSize must be a multiple of usermem.PageSize. |
| effectiveSize uint64 |
| |
| // fr is the offset into mfp.MemoryFile() that backs this contents of this |
| // segment. Immutable. |
| fr memmap.FileRange |
| |
| // mu protects all fields below. |
| mu sync.Mutex `state:"nosave"` |
| |
| // key is the public identifier for this segment. |
| key Key |
| |
| // perms is the access permissions for the segment. |
| perms fs.FilePermissions |
| |
| // owner of this segment. |
| owner fs.FileOwner |
| // attachTime is updated on every successful shmat. |
| attachTime ktime.Time |
| // detachTime is updated on every successful shmdt. |
| detachTime ktime.Time |
| // changeTime is updated on every successful changes to the segment via |
| // shmctl(IPC_SET). |
| changeTime ktime.Time |
| |
| // creatorPID is the PID of the process that created the segment. |
| creatorPID int32 |
| // lastAttachDetachPID is the pid of the process that issued the last shmat |
| // or shmdt syscall. |
| lastAttachDetachPID int32 |
| |
| // pendingDestruction indicates the segment was marked as destroyed through |
| // shmctl(IPC_RMID). When marked as destroyed, the segment will not be found |
| // in the registry and can no longer be attached. When the last user |
| // detaches from the segment, it is destroyed. |
| pendingDestruction bool |
| } |
| |
| // Precondition: Caller must hold s.mu. |
| func (s *Shm) debugLocked() string { |
| return fmt.Sprintf("Shm{id: %d, key: %d, size: %d bytes, refs: %d, destroyed: %v}", |
| s.ID, s.key, s.size, s.ReadRefs(), s.pendingDestruction) |
| } |
| |
| // MappedName implements memmap.MappingIdentity.MappedName. |
| func (s *Shm) MappedName(ctx context.Context) string { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| return fmt.Sprintf("SYSV%08d", s.key) |
| } |
| |
| // DeviceID implements memmap.MappingIdentity.DeviceID. |
| func (s *Shm) DeviceID() uint64 { |
| return shmDevice.DeviceID() |
| } |
| |
| // InodeID implements memmap.MappingIdentity.InodeID. |
| func (s *Shm) InodeID() uint64 { |
| // "shmid gets reported as "inode#" in /proc/pid/maps. proc-ps tools use |
| // this. Changing this will break them." -- Linux, ipc/shm.c:newseg() |
| return uint64(s.ID) |
| } |
| |
| // DecRef drops a reference on s. |
| // |
| // Precondition: Caller must not hold s.mu. |
| func (s *Shm) DecRef(ctx context.Context) { |
| s.ShmRefs.DecRef(func() { |
| s.mfp.MemoryFile().DecRef(s.fr) |
| s.registry.remove(s) |
| }) |
| } |
| |
| // Msync implements memmap.MappingIdentity.Msync. Msync is a no-op for shm |
| // segments. |
| func (s *Shm) Msync(context.Context, memmap.MappableRange) error { |
| return nil |
| } |
| |
| // AddMapping implements memmap.Mappable.AddMapping. |
| func (s *Shm) AddMapping(ctx context.Context, _ memmap.MappingSpace, _ usermem.AddrRange, _ uint64, _ bool) error { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| s.attachTime = ktime.NowFromContext(ctx) |
| if pid, ok := context.ThreadGroupIDFromContext(ctx); ok { |
| s.lastAttachDetachPID = pid |
| } else { |
| // AddMapping is called during a syscall, so ctx should always be a task |
| // context. |
| log.Warningf("Adding mapping to %s but couldn't get the current pid; not updating the last attach pid", s.debugLocked()) |
| } |
| return nil |
| } |
| |
| // RemoveMapping implements memmap.Mappable.RemoveMapping. |
| func (s *Shm) RemoveMapping(ctx context.Context, _ memmap.MappingSpace, _ usermem.AddrRange, _ uint64, _ bool) { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| // RemoveMapping may be called during task exit, when ctx |
| // is context.Background. Gracefully handle missing clocks. Failing to |
| // update the detach time in these cases is ok, since no one can observe the |
| // omission. |
| if clock := ktime.RealtimeClockFromContext(ctx); clock != nil { |
| s.detachTime = clock.Now() |
| } |
| |
| // If called from a non-task context we also won't have a threadgroup |
| // id. Silently skip updating the lastAttachDetachPid in that case. |
| if pid, ok := context.ThreadGroupIDFromContext(ctx); ok { |
| s.lastAttachDetachPID = pid |
| } else { |
| log.Debugf("Couldn't obtain pid when removing mapping to %s, not updating the last detach pid.", s.debugLocked()) |
| } |
| } |
| |
| // CopyMapping implements memmap.Mappable.CopyMapping. |
| func (*Shm) CopyMapping(context.Context, memmap.MappingSpace, usermem.AddrRange, usermem.AddrRange, uint64, bool) error { |
| return nil |
| } |
| |
| // Translate implements memmap.Mappable.Translate. |
| func (s *Shm) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { |
| var err error |
| if required.End > s.fr.Length() { |
| err = &memmap.BusError{syserror.EFAULT} |
| } |
| if source := optional.Intersect(memmap.MappableRange{0, s.fr.Length()}); source.Length() != 0 { |
| return []memmap.Translation{ |
| { |
| Source: source, |
| File: s.mfp.MemoryFile(), |
| Offset: s.fr.Start + source.Start, |
| Perms: usermem.AnyAccess, |
| }, |
| }, err |
| } |
| return nil, err |
| } |
| |
| // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. |
| func (s *Shm) InvalidateUnsavable(ctx context.Context) error { |
| return nil |
| } |
| |
| // AttachOpts describes various flags passed to shmat(2). |
| type AttachOpts struct { |
| Execute bool |
| Readonly bool |
| Remap bool |
| } |
| |
| // ConfigureAttach creates an mmap configuration for the segment with the |
| // requested attach options. |
| // |
| // Postconditions: The returned MMapOpts are valid only as long as a reference |
| // continues to be held on s. |
| func (s *Shm) ConfigureAttach(ctx context.Context, addr usermem.Addr, opts AttachOpts) (memmap.MMapOpts, error) { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| if s.pendingDestruction && s.ReadRefs() == 0 { |
| return memmap.MMapOpts{}, syserror.EIDRM |
| } |
| |
| if !s.checkPermissions(ctx, fs.PermMask{ |
| Read: true, |
| Write: !opts.Readonly, |
| Execute: opts.Execute, |
| }) { |
| // "The calling process does not have the required permissions for the |
| // requested attach type, and does not have the CAP_IPC_OWNER capability |
| // in the user namespace that governs its IPC namespace." - man shmat(2) |
| return memmap.MMapOpts{}, syserror.EACCES |
| } |
| return memmap.MMapOpts{ |
| Length: s.size, |
| Offset: 0, |
| Addr: addr, |
| Fixed: opts.Remap, |
| Perms: usermem.AccessType{ |
| Read: true, |
| Write: !opts.Readonly, |
| Execute: opts.Execute, |
| }, |
| MaxPerms: usermem.AnyAccess, |
| Mappable: s, |
| MappingIdentity: s, |
| }, nil |
| } |
| |
| // EffectiveSize returns the size of the underlying shared memory segment. This |
| // may be larger than the requested size at creation, due to rounding to page |
| // boundaries. |
| func (s *Shm) EffectiveSize() uint64 { |
| return s.effectiveSize |
| } |
| |
| // IPCStat returns information about a shm. See shmctl(IPC_STAT). |
| func (s *Shm) IPCStat(ctx context.Context) (*linux.ShmidDS, error) { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| |
| // "The caller must have read permission on the shared memory segment." |
| // - man shmctl(2) |
| if !s.checkPermissions(ctx, fs.PermMask{Read: true}) { |
| // "IPC_STAT or SHM_STAT is requested and shm_perm.mode does not allow |
| // read access for shmid, and the calling process does not have the |
| // CAP_IPC_OWNER capability in the user namespace that governs its IPC |
| // namespace." - man shmctl(2) |
| return nil, syserror.EACCES |
| } |
| |
| var mode uint16 |
| if s.pendingDestruction { |
| mode |= linux.SHM_DEST |
| } |
| creds := auth.CredentialsFromContext(ctx) |
| |
| // Use the reference count as a rudimentary count of the number of |
| // attaches. We exclude: |
| // |
| // 1. The reference the caller holds. |
| // 2. The self-reference held by s prior to destruction. |
| // |
| // Note that this may still overcount by including transient references |
| // used in concurrent calls. |
| nattach := uint64(s.ReadRefs()) - 1 |
| if !s.pendingDestruction { |
| nattach-- |
| } |
| |
| ds := &linux.ShmidDS{ |
| ShmPerm: linux.IPCPerm{ |
| Key: uint32(s.key), |
| UID: uint32(creds.UserNamespace.MapFromKUID(s.owner.UID)), |
| GID: uint32(creds.UserNamespace.MapFromKGID(s.owner.GID)), |
| CUID: uint32(creds.UserNamespace.MapFromKUID(s.creator.UID)), |
| CGID: uint32(creds.UserNamespace.MapFromKGID(s.creator.GID)), |
| Mode: mode | uint16(s.perms.LinuxMode()), |
| Seq: 0, // IPC sequences not supported. |
| }, |
| ShmSegsz: s.size, |
| ShmAtime: s.attachTime.TimeT(), |
| ShmDtime: s.detachTime.TimeT(), |
| ShmCtime: s.changeTime.TimeT(), |
| ShmCpid: s.creatorPID, |
| ShmLpid: s.lastAttachDetachPID, |
| ShmNattach: nattach, |
| } |
| |
| return ds, nil |
| } |
| |
| // Set modifies attributes for a segment. See shmctl(IPC_SET). |
| func (s *Shm) Set(ctx context.Context, ds *linux.ShmidDS) error { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| |
| if !s.checkOwnership(ctx) { |
| return syserror.EPERM |
| } |
| |
| creds := auth.CredentialsFromContext(ctx) |
| uid := creds.UserNamespace.MapToKUID(auth.UID(ds.ShmPerm.UID)) |
| gid := creds.UserNamespace.MapToKGID(auth.GID(ds.ShmPerm.GID)) |
| if !uid.Ok() || !gid.Ok() { |
| return syserror.EINVAL |
| } |
| |
| // User may only modify the lower 9 bits of the mode. All the other bits are |
| // always 0 for the underlying inode. |
| mode := linux.FileMode(ds.ShmPerm.Mode & 0x1ff) |
| s.perms = fs.FilePermsFromMode(mode) |
| |
| s.owner.UID = uid |
| s.owner.GID = gid |
| |
| s.changeTime = ktime.NowFromContext(ctx) |
| return nil |
| } |
| |
| // MarkDestroyed marks a segment for destruction. The segment is actually |
| // destroyed once it has no references. MarkDestroyed may be called multiple |
| // times, and is safe to call after a segment has already been destroyed. See |
| // shmctl(IPC_RMID). |
| func (s *Shm) MarkDestroyed(ctx context.Context) { |
| s.registry.dissociateKey(s) |
| |
| s.mu.Lock() |
| if s.pendingDestruction { |
| s.mu.Unlock() |
| return |
| } |
| s.pendingDestruction = true |
| s.mu.Unlock() |
| |
| // Drop the self-reference so destruction occurs when all |
| // external references are gone. |
| // |
| // N.B. This cannot be the final DecRef, as the caller also |
| // holds a reference. |
| s.DecRef(ctx) |
| return |
| } |
| |
| // checkOwnership verifies whether a segment may be accessed by ctx as an |
| // owner. See ipc/util.c:ipcctl_pre_down_nolock() in Linux. |
| // |
| // Precondition: Caller must hold s.mu. |
| func (s *Shm) checkOwnership(ctx context.Context) bool { |
| creds := auth.CredentialsFromContext(ctx) |
| if s.owner.UID == creds.EffectiveKUID || s.creator.UID == creds.EffectiveKUID { |
| return true |
| } |
| |
| // Tasks with CAP_SYS_ADMIN may bypass ownership checks. Strangely, Linux |
| // doesn't use CAP_IPC_OWNER for this despite CAP_IPC_OWNER being documented |
| // for use to "override IPC ownership checks". |
| return creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, s.registry.userNS) |
| } |
| |
| // checkPermissions verifies whether a segment is accessible by ctx for access |
| // described by req. See ipc/util.c:ipcperms() in Linux. |
| // |
| // Precondition: Caller must hold s.mu. |
| func (s *Shm) checkPermissions(ctx context.Context, req fs.PermMask) bool { |
| creds := auth.CredentialsFromContext(ctx) |
| |
| p := s.perms.Other |
| if s.owner.UID == creds.EffectiveKUID { |
| p = s.perms.User |
| } else if creds.InGroup(s.owner.GID) { |
| p = s.perms.Group |
| } |
| if p.SupersetOf(req) { |
| return true |
| } |
| |
| // Tasks with CAP_IPC_OWNER may bypass permission checks. |
| return creds.HasCapabilityIn(linux.CAP_IPC_OWNER, s.registry.userNS) |
| } |