[WASMFS] In-memory file - create, read, write (#15318)

Relevent Issue: #15041

- Implement new in memory file class
- Expand coverage of open syscall
- Expand functionality of read and write to handle file offsets.
diff --git a/system/lib/wasmfs/file.cpp b/system/lib/wasmfs/file.cpp
new file mode 100644
index 0000000..b484a0e
--- /dev/null
+++ b/system/lib/wasmfs/file.cpp
@@ -0,0 +1,41 @@
+// Copyright 2021 The Emscripten Authors.  All rights reserved.
+// Emscripten is available under two separate licenses, the MIT license and the
+// University of Illinois/NCSA Open Source License.  Both these licenses can be
+// found in the LICENSE file.
+// This file defines the file object of the new file system.
+// Current Status: Work in Progress.
+// See https://github.com/emscripten-core/emscripten/issues/15041.
+
+#include "file.h"
+
+namespace wasmfs {
+//
+// Directory
+//
+std::shared_ptr<File> Directory::Handle::getEntry(std::string pathName) {
+  auto it = getDir().entries.find(pathName);
+  if (it == getDir().entries.end()) {
+    return nullptr;
+  } else {
+    return it->second;
+  }
+}
+//
+// MemoryFile
+//
+__wasi_errno_t MemoryFile::write(const uint8_t* buf, size_t len, off_t offset) {
+  if (offset + len >= buffer.size()) {
+    buffer.resize(offset + len);
+    size = buffer.size();
+  }
+  memcpy(&buffer[offset], buf, len);
+
+  return __WASI_ERRNO_SUCCESS;
+}
+
+__wasi_errno_t MemoryFile::read(uint8_t* buf, size_t len, off_t offset) {
+  std::memcpy(buf, &buffer[offset], len);
+
+  return __WASI_ERRNO_SUCCESS;
+};
+} // namespace wasmfs
diff --git a/system/lib/wasmfs/file.h b/system/lib/wasmfs/file.h
index 43ef016..9a49fc4 100644
--- a/system/lib/wasmfs/file.h
+++ b/system/lib/wasmfs/file.h
@@ -46,7 +46,6 @@
   }
 
   class Handle {
-
     std::unique_lock<std::mutex> lock;
 
   protected:
@@ -55,7 +54,7 @@
   public:
     Handle(std::shared_ptr<File> file) : file(file), lock(file->mutex) {}
     size_t& size() { return file->size; }
-    uint32_t& mode() { return file->mode; }
+    mode_t& mode() { return file->mode; }
     time_t& ctime() { return file->ctime; }
     time_t& mtime() { return file->mtime; }
     time_t& atime() { return file->atime; }
@@ -65,30 +64,33 @@
 
 protected:
   File(FileKind kind) : kind(kind) {}
+  File(FileKind kind, mode_t mode) : kind(kind), mode(mode) {}
   // A mutex is needed for multiple accesses to the same file.
   std::mutex mutex;
 
-private:
   size_t size = 0;
 
-  uint32_t mode = 0; // r/w/x modes
+  mode_t mode = 0; // User and group mode bits for access permission.
 
-  time_t ctime = 0; // Time when the file node was last modified
-  time_t mtime = 0; // Time when the file content was last modified
-  time_t atime = 0; // Time when the content was last accessed
+  time_t ctime = 0; // Time when the file node was last modified.
+  time_t mtime = 0; // Time when the file content was last modified.
+  time_t atime = 0; // Time when the content was last accessed.
 
   FileKind kind;
 };
 
 class DataFile : public File {
 
-  virtual __wasi_errno_t read(const uint8_t* buf, __wasi_size_t len) = 0;
-  virtual __wasi_errno_t write(const uint8_t* buf, __wasi_size_t len) = 0;
+  virtual __wasi_errno_t read(uint8_t* buf, size_t len, off_t offset) = 0;
+  virtual __wasi_errno_t
+  write(const uint8_t* buf, size_t len, off_t offset) = 0;
 
 public:
   static constexpr FileKind expectedKind = File::DataFileKind;
   DataFile() : File(File::DataFileKind) {}
+  DataFile(mode_t mode) : File(File::DataFileKind, mode) {}
   virtual ~DataFile() = default;
+
   class Handle : public File::Handle {
 
     DataFile& getFile() { return *file.get()->cast<DataFile>(); }
@@ -97,11 +99,11 @@
     Handle(std::shared_ptr<File> dataFile) : File::Handle(dataFile) {}
     Handle(Handle&&) = default;
 
-    __wasi_errno_t read(const uint8_t* buf, __wasi_size_t len) {
-      return getFile().read(buf, len);
+    __wasi_errno_t read(uint8_t* buf, size_t len, off_t offset) {
+      return getFile().read(buf, len, offset);
     }
-    __wasi_errno_t write(const uint8_t* buf, __wasi_size_t len) {
-      return getFile().write(buf, len);
+    __wasi_errno_t write(const uint8_t* buf, size_t len, off_t offset) {
+      return getFile().write(buf, len, offset);
     }
   };
 
@@ -116,20 +118,15 @@
 public:
   static constexpr FileKind expectedKind = File::DirectoryKind;
   Directory() : File(File::DirectoryKind) {}
+
   class Handle : public File::Handle {
     Directory& getDir() { return *file.get()->cast<Directory>(); }
 
   public:
     Handle(std::shared_ptr<File> directory) : File::Handle(directory) {}
 
-    std::shared_ptr<File> getEntry(std::string pathName) {
-      auto it = getDir().entries.find(pathName);
-      if (it == getDir().entries.end()) {
-        return nullptr;
-      } else {
-        return it->second;
-      }
-    }
+    std::shared_ptr<File> getEntry(std::string pathName);
+
     void setEntry(std::string pathName, std::shared_ptr<File> inserted) {
       getDir().entries[pathName] = inserted;
     }
@@ -147,4 +144,16 @@
   Handle locked() { return Handle(shared_from_this()); }
 };
 
+// This class describes a file that lives in Wasm Memory.
+class MemoryFile : public DataFile {
+  std::vector<uint8_t> buffer;
+
+  __wasi_errno_t write(const uint8_t* buf, size_t len, off_t offset) override;
+
+  __wasi_errno_t read(uint8_t* buf, size_t len, off_t offset) override;
+
+public:
+  MemoryFile(mode_t mode) : DataFile(mode) {}
+};
+
 } // namespace wasmfs
diff --git a/system/lib/wasmfs/file_table.cpp b/system/lib/wasmfs/file_table.cpp
index 27ee61a..7d7cec8 100644
--- a/system/lib/wasmfs/file_table.cpp
+++ b/system/lib/wasmfs/file_table.cpp
@@ -13,10 +13,10 @@
 std::vector<std::shared_ptr<OpenFileState>> FileTable::entries;
 
 static __wasi_errno_t writeStdBuffer(const uint8_t* buf,
-                                     __wasi_size_t len,
+                                     size_t len,
                                      void (*console_write)(const char*),
                                      std::vector<char>& fd_write_buffer) {
-  for (__wasi_size_t j = 0; j < len; j++) {
+  for (size_t j = 0; j < len; j++) {
     uint8_t current = buf[j];
     if (current == '\0' || current == '\n') {
       fd_write_buffer.push_back('\0'); // for null-terminated C strings
@@ -31,11 +31,11 @@
 
 class StdinFile : public DataFile {
 
-  __wasi_errno_t write(const uint8_t* buf, __wasi_size_t len) override {
+  __wasi_errno_t write(const uint8_t* buf, size_t len, off_t offset) override {
     return __WASI_ERRNO_INVAL;
   }
 
-  __wasi_errno_t read(const uint8_t* buf, __wasi_size_t len) override {
+  __wasi_errno_t read(uint8_t* buf, size_t len, off_t offset) override {
     return __WASI_ERRNO_INVAL;
   };
 
@@ -50,11 +50,11 @@
 class StdoutFile : public DataFile {
   std::vector<char> writeBuffer;
 
-  __wasi_errno_t write(const uint8_t* buf, __wasi_size_t len) override {
+  __wasi_errno_t write(const uint8_t* buf, size_t len, off_t offset) override {
     return writeStdBuffer(buf, len, &emscripten_console_log, writeBuffer);
   }
 
-  __wasi_errno_t read(const uint8_t* buf, __wasi_size_t len) override {
+  __wasi_errno_t read(uint8_t* buf, size_t len, off_t offset) override {
     return __WASI_ERRNO_INVAL;
   };
 
@@ -72,11 +72,11 @@
   // TODO: May not want to proxy stderr (fd == 2) to the main thread.
   // This will not show in HTML - a console.warn in a worker is sufficient.
   // This would be a change from the current FS.
-  __wasi_errno_t write(const uint8_t* buf, __wasi_size_t len) override {
+  __wasi_errno_t write(const uint8_t* buf, size_t len, off_t offset) override {
     return writeStdBuffer(buf, len, &emscripten_console_error, writeBuffer);
   }
 
-  __wasi_errno_t read(const uint8_t* buf, __wasi_size_t len) override {
+  __wasi_errno_t read(uint8_t* buf, size_t len, off_t offset) override {
     return __WASI_ERRNO_INVAL;
   };
 
@@ -90,11 +90,11 @@
 
 FileTable::FileTable() {
   entries.push_back(
-    std::make_shared<OpenFileState>(0, StdinFile::getSingleton()));
+    std::make_shared<OpenFileState>(0, O_RDONLY, StdinFile::getSingleton()));
   entries.push_back(
-    std::make_shared<OpenFileState>(0, StdoutFile::getSingleton()));
+    std::make_shared<OpenFileState>(0, O_WRONLY, StdoutFile::getSingleton()));
   entries.push_back(
-    std::make_shared<OpenFileState>(0, StderrFile::getSingleton()));
+    std::make_shared<OpenFileState>(0, O_WRONLY, StderrFile::getSingleton()));
 }
 
 // Initialize default directories including dev/stdin, dev/stdout, dev/stderr.
diff --git a/system/lib/wasmfs/file_table.h b/system/lib/wasmfs/file_table.h
index 1b2bb88..b616985 100644
--- a/system/lib/wasmfs/file_table.h
+++ b/system/lib/wasmfs/file_table.h
@@ -16,20 +16,38 @@
 #include <wasi/api.h>
 
 namespace wasmfs {
+static_assert(std::is_same<size_t, __wasi_size_t>::value,
+              "size_t should be the same as __wasi_size_t");
+static_assert(std::is_same<off_t, __wasi_filedelta_t>::value,
+              "off_t should be the same as __wasi_filedelta_t");
+
+// Overflow and underflow behaviour are only defined for unsigned types.
+template<typename T> bool addWillOverFlow(T a, T b) {
+  if (a > 0 && b > std::numeric_limits<T>::max() - a) {
+    return true;
+  }
+  return false;
+}
+
+// Access mode, file creation and file status flags for open.
+using wasmfs_oflags_t = uint32_t;
 
 std::shared_ptr<Directory> getRootDirectory();
 
 class OpenFileState : public std::enable_shared_from_this<OpenFileState> {
   std::shared_ptr<File> file;
-  __wasi_filedelta_t offset;
+  off_t position;
+  wasmfs_oflags_t flags; // RD_ONLY, WR_ONLY, RDWR
   // An OpenFileState needs a mutex if there are concurrent accesses on one open
   // file descriptor. This could occur if there are multiple seeks on the same
   // open file descriptor.
   std::mutex mutex;
 
 public:
-  OpenFileState(uint32_t offset, std::shared_ptr<File> file)
-    : offset(offset), file(file) {}
+  OpenFileState(size_t position,
+                wasmfs_oflags_t flags,
+                std::shared_ptr<File> file)
+    : position(position), flags(flags), file(file) {}
 
   class Handle {
     std::shared_ptr<OpenFileState> openFileState;
@@ -40,6 +58,8 @@
       : openFileState(openFileState), lock(openFileState->mutex) {}
 
     std::shared_ptr<File>& getFile() { return openFileState->file; };
+
+    off_t& position() { return openFileState->position; };
   };
 
   Handle get() { return Handle(shared_from_this()); }
diff --git a/system/lib/wasmfs/wasmfs.cpp b/system/lib/wasmfs/wasmfs.cpp
index 4c09866..5ab6b42 100644
--- a/system/lib/wasmfs/wasmfs.cpp
+++ b/system/lib/wasmfs/wasmfs.cpp
@@ -61,31 +61,112 @@
                                const __wasi_ciovec_t* iovs,
                                size_t iovs_len,
                                __wasi_size_t* nwritten) {
+  if (iovs_len < 0) {
+    return __WASI_ERRNO_INVAL;
+  }
+
   auto openFile = FileTable::get()[fd];
 
   if (!openFile) {
     return __WASI_ERRNO_BADF;
   }
 
-  auto file = openFile.locked().getFile()->dynCast<DataFile>();
+  auto lockedOpenFile = openFile.locked();
+  auto* file = lockedOpenFile.getFile()->dynCast<DataFile>();
 
   // If file is nullptr, then the file was not a DataFile.
+  // TODO: change to add support for symlinks.
   if (!file) {
     return __WASI_ERRNO_ISDIR;
   }
 
   auto lockedFile = file->locked();
 
-  __wasi_size_t num = 0;
+  off_t offset = lockedOpenFile.position();
   for (size_t i = 0; i < iovs_len; i++) {
     const uint8_t* buf = iovs[i].buf;
-    __wasi_size_t len = iovs[i].buf_len;
+    size_t len = iovs[i].buf_len;
 
-    lockedFile.write(buf, len);
-    num += len;
+    // Check if the sum of the buf_len values overflows an off_t (63 bits).
+    if (addWillOverFlow(offset, off_t(len))) {
+      return __WASI_ERRNO_FBIG;
+    }
+
+    // Check if buf_len specifies a positive length buffer but buf is a
+    // null pointer
+    if (!buf && len > 0) {
+      return __WASI_ERRNO_INVAL;
+    }
+
+    auto result = lockedFile.write(buf, len, offset);
+
+    if (result != __WASI_ERRNO_SUCCESS) {
+      *nwritten = offset - lockedOpenFile.position();
+      lockedOpenFile.position() = offset;
+      return result;
+    }
+    offset += len;
   }
-  *nwritten = num;
+  *nwritten = offset - lockedOpenFile.position();
+  lockedOpenFile.position() = offset;
+  return __WASI_ERRNO_SUCCESS;
+}
 
+__wasi_errno_t __wasi_fd_read(__wasi_fd_t fd,
+                              const __wasi_iovec_t* iovs,
+                              size_t iovs_len,
+                              __wasi_size_t* nread) {
+  if (iovs_len < 0) {
+    return __WASI_ERRNO_INVAL;
+  }
+
+  auto openFile = FileTable::get()[fd];
+
+  if (!openFile) {
+    return __WASI_ERRNO_BADF;
+  }
+
+  auto lockedOpenFile = openFile.locked();
+  auto* file = lockedOpenFile.getFile()->dynCast<DataFile>();
+
+  // If file is nullptr, then the file was not a DataFile.
+  // TODO: change to add support for symlinks.
+  if (!file) {
+    return __WASI_ERRNO_ISDIR;
+  }
+
+  auto lockedFile = file->locked();
+
+  off_t offset = lockedOpenFile.position();
+  size_t size = lockedFile.size();
+  for (size_t i = 0; i < iovs_len; i++) {
+    // Check if offset has exceeded the size of file data.
+    ssize_t dataLeft = size - offset;
+    if (dataLeft <= 0) {
+      break;
+    }
+
+    uint8_t* buf = iovs[i].buf;
+
+    // Check if buf_len specifies a positive length buffer
+    // but buf is a null pointer.
+    if (!buf && iovs[i].buf_len > 0) {
+      return __WASI_ERRNO_INVAL;
+    }
+
+    size_t bytesToRead = std::min(size_t(dataLeft), iovs[i].buf_len);
+
+    auto result = lockedFile.read(buf, bytesToRead, offset);
+
+    if (result != __WASI_ERRNO_SUCCESS) {
+      *nread = offset - lockedOpenFile.position();
+      lockedOpenFile.position() = offset;
+      return result;
+    }
+    offset += bytesToRead;
+  }
+  *nread = offset - lockedOpenFile.position();
+  lockedOpenFile.position() = offset;
   return __WASI_ERRNO_SUCCESS;
 }
 
@@ -153,37 +234,6 @@
   return __WASI_ERRNO_SUCCESS;
 }
 
-__wasi_errno_t __wasi_fd_read(__wasi_fd_t fd,
-                              const __wasi_iovec_t* iovs,
-                              size_t iovs_len,
-                              __wasi_size_t* nread) {
-  auto openFile = FileTable::get()[fd];
-
-  if (!openFile) {
-    return __WASI_ERRNO_BADF;
-  }
-
-  auto file = openFile.locked().getFile()->dynCast<DataFile>();
-
-  // If file is nullptr, then the file was not a DataFile.
-  if (!file) {
-    return __WASI_ERRNO_ISDIR;
-  }
-
-  auto lockedFile = file->locked();
-
-  __wasi_size_t num = 0;
-  for (size_t i = 0; i < iovs_len; i++) {
-    const uint8_t* buf = iovs[i].buf;
-    __wasi_size_t len = iovs[i].buf_len;
-
-    lockedFile.read(buf, len);
-    num += len;
-  }
-  *nread = num;
-  return __WASI_ERRNO_INVAL;
-}
-
 __wasi_fd_t __syscall_open(long pathname, long flags, long mode) {
   int accessMode = (flags & O_ACCMODE);
   bool canWrite = false;
@@ -192,6 +242,10 @@
     canWrite = true;
   }
 
+  // TODO: remove assert when all functionality is complete.
+  assert(((flags) & ~(O_CREAT | O_EXCL | O_DIRECTORY | O_TRUNC | O_APPEND |
+                      O_RDWR | O_WRONLY | O_RDONLY | O_LARGEFILE)) == 0);
+
   std::vector<std::string> pathParts;
 
   char newPathName[strlen((char*)pathname) + 1];
@@ -208,8 +262,8 @@
   }
 
   std::shared_ptr<File> curr = getRootDirectory();
-  for (int i = 0; i < pathParts.size(); i++) {
 
+  for (int i = 0; i < pathParts.size(); i++) {
     auto directory = curr->dynCast<Directory>();
 
     // If file is nullptr, then the file was not a Directory.
@@ -224,9 +278,22 @@
 #endif
     curr = directory->locked().getEntry(pathParts[i]);
 
-    // Requested entry (file or directory
+    // Requested entry (file or directory)
     if (!curr) {
-      return -(ENOENT);
+      // Create last element in path if O_CREAT is specified.
+      if (i == pathParts.size() - 1 && flags & O_CREAT) {
+        auto lockedDir = directory->locked();
+
+        // Create an empty in-memory file.
+        auto created = std::make_shared<MemoryFile>(mode);
+
+        lockedDir.setEntry(pathParts[i], created);
+        auto openFile = std::make_shared<OpenFileState>(0, flags, created);
+
+        return FileTable::get().add(openFile);
+      } else {
+        return -(ENOENT);
+      }
     }
 
 #ifdef WASMFS_DEBUG
@@ -235,7 +302,17 @@
 #endif
   }
 
-  auto openFile = std::make_shared<OpenFileState>(0, curr);
+  // Fail if O_DIRECTORY is specified and pathname is not a directory
+  if (flags & O_DIRECTORY && !curr->is<Directory>()) {
+    return -(ENOTDIR);
+  }
+
+  // Return an error if the file exists and O_CREAT and O_EXCL are specified.
+  if (flags & O_EXCL && flags & O_CREAT) {
+    return -(EEXIST);
+  }
+
+  auto openFile = std::make_shared<OpenFileState>(0, flags, curr);
 
   return FileTable::get().add(openFile);
 }
diff --git a/tests/test_other.py b/tests/test_other.py
index d4faf98..28840f6 100644
--- a/tests/test_other.py
+++ b/tests/test_other.py
@@ -11154,3 +11154,7 @@
   def test_wasmfs_unistd_fstat(self):
     self.set_setting('WASMFS')
     self.do_run_in_out_file_test('wasmfs/wasmfs_fstat.c')
+
+  def test_wasmfs_unistd_create(self):
+    self.set_setting('WASMFS')
+    self.do_run_in_out_file_test('wasmfs/wasmfs_create.c')
diff --git a/tests/wasmfs/wasmfs_create.c b/tests/wasmfs/wasmfs_create.c
new file mode 100644
index 0000000..974bf6a
--- /dev/null
+++ b/tests/wasmfs/wasmfs_create.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2021 The Emscripten Authors.  All rights reserved.
+ * Emscripten is available under two separate licenses, the MIT license and the
+ * University of Illinois/NCSA Open Source License.  Both these licenses can be
+ * found in the LICENSE file.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+// FIXME: Merge with other existing close and open tests.
+
+int main() {
+  // Test creating a new file and writing and reading from it.
+  errno = 0;
+  int fd = open("/test", O_RDWR | O_CREAT);
+  assert(errno == 0);
+  const char* msg = "Test\n";
+  errno = 0;
+  write(fd, msg, strlen(msg));
+  assert(errno == 0);
+  // Attempt to open another FD to the file just created.
+  errno = 0;
+  int test = open("/test", O_RDWR);
+  assert(errno == 0);
+  char buf[100] = {};
+  errno = 0;
+  read(test, buf, sizeof(buf));
+  assert(errno == 0);
+  printf("%s", buf);
+  close(fd);
+  close(test);
+
+  // Try to create an existing file with O_EXCL and O_CREAT.
+  errno = 0;
+  int fd2 = open("/dev/stdin", O_RDWR | O_CREAT | O_EXCL);
+  printf("Errno: %s\n", strerror(errno));
+  assert(errno == EEXIST);
+
+  // Try to open a file with O_DIRECTORY.
+  errno = 0;
+  int fd3 = open("/dev/stdin", O_RDWR | O_DIRECTORY);
+  printf("Errno: %s\n", strerror(errno));
+  assert(errno == ENOTDIR);
+
+  // Try to open a directory with O_DIRECTORY.
+  errno = 0;
+  int fd4 = open("/dev", O_RDONLY | O_DIRECTORY);
+  printf("Errno: %s\n", strerror(errno));
+  assert(errno == 0);
+
+  // Test zero size reads and writes.
+  char buf2[100] = {};
+  int fd5 = open("/newFile", O_RDWR | O_CREAT);
+  errno = 0;
+  printf("Read %zi bytes\n", read(fd5, buf2, 0));
+  assert(errno == 0);
+  printf("Wrote %zi bytes\n", write(fd5, msg, 0));
+  assert(errno == 0);
+
+  // Test large size reads and writes multiple times.
+  char buf3[100] = {};
+  int fd6 = open("/testFile", O_RDWR | O_CREAT);
+  errno = 0;
+  printf("Wrote %zi bytes\n", write(fd6, msg, strlen(msg) + 20));
+  printf("Wrote %zi bytes\n", write(fd6, msg, strlen(msg)));
+  printf("Wrote %zi bytes\n", write(fd6, msg, strlen(msg) + 30));
+  printf("Read %zi bytes\n", read(fd6, buf, 10000));
+
+  int fd7 = open("/testFile", O_RDWR);
+  assert(errno == 0);
+  printf("Read %zi bytes\n", read(fd7, buf3, sizeof buf3));
+  printf("File contents: %s", buf3);
+  assert(errno == 0);
+
+  // TODO: use seek to test out of bounds read.
+
+  return 0;
+}
diff --git a/tests/wasmfs/wasmfs_create.out b/tests/wasmfs/wasmfs_create.out
new file mode 100644
index 0000000..2e6d9ee
--- /dev/null
+++ b/tests/wasmfs/wasmfs_create.out
@@ -0,0 +1,12 @@
+Test
+Errno: File exists
+Errno: Not a directory
+Errno: No error information
+Read 0 bytes
+Wrote 0 bytes
+Wrote 25 bytes
+Wrote 5 bytes
+Wrote 35 bytes
+Read 0 bytes
+Read 65 bytes
+File contents: Test
diff --git a/tools/system_libs.py b/tools/system_libs.py
index dabaa4e..68c9ed9 100644
--- a/tools/system_libs.py
+++ b/tools/system_libs.py
@@ -1292,7 +1292,7 @@
   def get_files(self):
     return files_in_path(
         path='system/lib/wasmfs',
-        filenames=['wasmfs.cpp', 'file_table.cpp'])
+        filenames=['wasmfs.cpp', 'file_table.cpp', 'file.cpp'])
 
   def can_build(self):
     return settings.WASMFS