[clang-format] Proposal for clang-format to give compiler style warnings

Summary:
Related somewhat to {D29039}

On seeing a quote on twitter by @invalidop

> If it's not formatted with clang-format it's a build error.

This made me want to change the way I use clang-format into a tool that could optionally show me where my source code violates clang-format syle.

When I'm making a change to clang-format itself, one thing I like to do to test the change is to ensure I didn't cause a huge wave of changes, what I want to do is simply run this on a known formatted directory and see if any new differences arrive in a manner I'm used to.

This started me thinking that we should allow build systems to run clang-format on a whole tree and emit compiler style warnings about files that fail clang-format in a form that would make them as a warning in most build systems and because those build systems range in their construction I don't think its unreasonable to NOT expect them to have to do the directory searching or parsing the output replacements themselves, but simply transform that into an error code when there are changes required.

I am starting this by suggesing adding a -n or -dry-run command line argument which would emit a warning/error of the form

Support for various common compiler command line argumuments like '-Werror' and '-ferror-limit' could make this very flexible to be integrated into build systems and CI systems.

```
> $ /usr/bin/clang-format --dry-run ClangFormat.cpp -ferror-limit=3 -fcolor-diagnostics
> ClangFormat.cpp:54:29: warning: code should be clang-formatted [-Wclang-format-violations]
> static cl::list<std::string>
>                             ^
> ClangFormat.cpp:55:20: warning: code should be clang-formatted [-Wclang-format-violations]
> LineRanges("lines", cl::desc("<start line>:<end line> - format a range of\n"
>                    ^
> ClangFormat.cpp:55:77: warning: code should be clang-formatted [-Wclang-format-violations]
> LineRanges("lines", cl::desc("<start line>:<end line> - format a range of\n"
>                                                                             ^
```

Reviewers: mitchell-stellar, klimek, owenpan

Reviewed By: klimek

Subscribers: mgorny, cfe-commits

Tags: #clang-format, #clang-tools-extra, #clang

Differential Revision: https://reviews.llvm.org/D68554

git-svn-id: svn://svn.chromium.org/llvm-project/cfe/trunk/tools/clang-format@374663 0b72dbe1-c17f-4bc7-b9db-2b4152be0356
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 35ecdb1..28ac4fb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,6 +7,7 @@
 set(CLANG_FORMAT_LIB_DEPS
   clangBasic
   clangFormat
+  clangFrontend
   clangRewrite
   clangToolingCore
   )
diff --git a/ClangFormat.cpp b/ClangFormat.cpp
index f2d57ee..f39c18b 100644
--- a/ClangFormat.cpp
+++ b/ClangFormat.cpp
@@ -18,6 +18,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Version.h"
 #include "clang/Format/Format.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
 #include "clang/Rewrite/Core/Rewriter.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
@@ -108,6 +109,54 @@
     Verbose("verbose", cl::desc("If set, shows the list of processed files"),
             cl::cat(ClangFormatCategory));
 
+// Use --dry-run to match other LLVM tools when you mean do it but don't
+// actually do it
+static cl::opt<bool>
+    DryRun("dry-run",
+           cl::desc("If set, do not actually make the formatting changes"),
+           cl::cat(ClangFormatCategory));
+
+// Use -n as a common command as an alias for --dry-run. (git and make use -n)
+static cl::alias DryRunShort("n", cl::desc("Alias for --dry-run"),
+                             cl::cat(ClangFormatCategory), cl::aliasopt(DryRun),
+                             cl::NotHidden);
+
+// Emulate being able to turn on/off the warning.
+static cl::opt<bool>
+    WarnFormat("Wclang-format-violations",
+               cl::desc("Warnings about individual formatting changes needed. "
+                        "Used only with --dry-run or -n"),
+               cl::init(true), cl::cat(ClangFormatCategory), cl::Hidden);
+
+static cl::opt<bool>
+    NoWarnFormat("Wno-clang-format-violations",
+                 cl::desc("Do not warn about individual formatting changes "
+                          "needed. Used only with --dry-run or -n"),
+                 cl::init(false), cl::cat(ClangFormatCategory), cl::Hidden);
+
+static cl::opt<unsigned> ErrorLimit(
+    "ferror-limit",
+    cl::desc("Set the maximum number of clang-format errors to emit before "
+             "stopping (0 = no limit). Used only with --dry-run or -n"),
+    cl::init(0), cl::cat(ClangFormatCategory));
+
+static cl::opt<bool>
+    WarningsAsErrors("Werror",
+                     cl::desc("If set, changes formatting warnings to errors"),
+                     cl::cat(ClangFormatCategory));
+
+static cl::opt<bool>
+    ShowColors("fcolor-diagnostics",
+               cl::desc("If set, and on a color-capable terminal controls "
+                        "whether or not to print diagnostics in color"),
+               cl::init(true), cl::cat(ClangFormatCategory), cl::Hidden);
+
+static cl::opt<bool>
+    NoShowColors("fno-color-diagnostics",
+                 cl::desc("If set, and on a color-capable terminal controls "
+                          "whether or not to print diagnostics in color"),
+                 cl::init(false), cl::cat(ClangFormatCategory), cl::Hidden);
+
 static cl::list<std::string> FileNames(cl::Positional, cl::desc("[<file> ...]"),
                                        cl::cat(ClangFormatCategory));
 
@@ -241,6 +290,95 @@
   }
 }
 
+// If BufStr has an invalid BOM, returns the BOM name; otherwise, returns
+// nullptr.
+static const char *getInValidBOM(StringRef BufStr) {
+  // Check to see if the buffer has a UTF Byte Order Mark (BOM).
+  // We only support UTF-8 with and without a BOM right now.  See
+  // https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding
+  // for more information.
+  const char *InvalidBOM =
+      llvm::StringSwitch<const char *>(BufStr)
+          .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
+                      "UTF-32 (BE)")
+          .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
+                      "UTF-32 (LE)")
+          .StartsWith("\xFE\xFF", "UTF-16 (BE)")
+          .StartsWith("\xFF\xFE", "UTF-16 (LE)")
+          .StartsWith("\x2B\x2F\x76", "UTF-7")
+          .StartsWith("\xF7\x64\x4C", "UTF-1")
+          .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
+          .StartsWith("\x0E\xFE\xFF", "SCSU")
+          .StartsWith("\xFB\xEE\x28", "BOCU-1")
+          .StartsWith("\x84\x31\x95\x33", "GB-18030")
+          .Default(nullptr);
+  return InvalidBOM;
+}
+
+static bool
+emitReplacementWarnings(const Replacements &Replaces, StringRef AssumedFileName,
+                        const std::unique_ptr<llvm::MemoryBuffer> &Code) {
+  if (Replaces.empty()) {
+    return false;
+  }
+
+  IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
+  DiagOpts->ShowColors = (ShowColors && !NoShowColors);
+
+  TextDiagnosticPrinter *DiagsBuffer =
+      new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts, false);
+
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticsEngine> Diags(
+      new DiagnosticsEngine(DiagID, &*DiagOpts, DiagsBuffer));
+
+  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
+      new llvm::vfs::InMemoryFileSystem);
+  FileManager Files(FileSystemOptions(), InMemoryFileSystem);
+  SourceManager Sources(*Diags, Files);
+  FileID FileID = createInMemoryFile(AssumedFileName, Code.get(), Sources,
+                                     Files, InMemoryFileSystem.get());
+
+  const unsigned ID = Diags->getCustomDiagID(
+      WarningsAsErrors ? clang::DiagnosticsEngine::Error
+                       : clang::DiagnosticsEngine::Warning,
+      "code should be clang-formatted [-Wclang-format-violations]");
+
+  unsigned Errors = 0;
+  DiagsBuffer->BeginSourceFile(LangOptions(), nullptr);
+  if (WarnFormat && !NoWarnFormat) {
+    for (const auto &R : Replaces) {
+      Diags->Report(
+          Sources.getLocForStartOfFile(FileID).getLocWithOffset(R.getOffset()),
+          ID);
+      Errors++;
+      if (ErrorLimit && Errors >= ErrorLimit)
+        break;
+    }
+  }
+  DiagsBuffer->EndSourceFile();
+  return WarningsAsErrors;
+}
+
+static void outputXML(const Replacements &Replaces,
+                      const Replacements &FormatChanges,
+                      const FormattingAttemptStatus &Status,
+                      const cl::opt<unsigned> &Cursor,
+                      unsigned CursorPosition) {
+  outs() << "<?xml version='1.0'?>\n<replacements "
+            "xml:space='preserve' incomplete_format='"
+         << (Status.FormatComplete ? "false" : "true") << "'";
+  if (!Status.FormatComplete)
+    outs() << " line='" << Status.Line << "'";
+  outs() << ">\n";
+  if (Cursor.getNumOccurrences() != 0)
+    outs() << "<cursor>" << FormatChanges.getShiftedCodePosition(CursorPosition)
+           << "</cursor>\n";
+
+  outputReplacementsXML(Replaces);
+  outs() << "</replacements>\n";
+}
+
 // Returns true on error.
 static bool format(StringRef FileName) {
   if (!OutputXML && Inplace && FileName == "-") {
@@ -260,26 +398,9 @@
   if (Code->getBufferSize() == 0)
     return false; // Empty files are formatted correctly.
 
-  // Check to see if the buffer has a UTF Byte Order Mark (BOM).
-  // We only support UTF-8 with and without a BOM right now.  See
-  // https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding
-  // for more information.
   StringRef BufStr = Code->getBuffer();
-  const char *InvalidBOM =
-      llvm::StringSwitch<const char *>(BufStr)
-          .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
-                      "UTF-32 (BE)")
-          .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
-                      "UTF-32 (LE)")
-          .StartsWith("\xFE\xFF", "UTF-16 (BE)")
-          .StartsWith("\xFF\xFE", "UTF-16 (LE)")
-          .StartsWith("\x2B\x2F\x76", "UTF-7")
-          .StartsWith("\xF7\x64\x4C", "UTF-1")
-          .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
-          .StartsWith("\x0E\xFE\xFF", "SCSU")
-          .StartsWith("\xFB\xEE\x28", "BOCU-1")
-          .StartsWith("\x84\x31\x95\x33", "GB-18030")
-          .Default(nullptr);
+
+  const char *InvalidBOM = getInValidBOM(BufStr);
 
   if (InvalidBOM) {
     errs() << "error: encoding with unsupported byte order mark \""
@@ -318,20 +439,12 @@
   Replacements FormatChanges =
       reformat(*FormatStyle, *ChangedCode, Ranges, AssumedFileName, &Status);
   Replaces = Replaces.merge(FormatChanges);
-  if (OutputXML) {
-    outs() << "<?xml version='1.0'?>\n<replacements "
-              "xml:space='preserve' incomplete_format='"
-           << (Status.FormatComplete ? "false" : "true") << "'";
-    if (!Status.FormatComplete)
-      outs() << " line='" << Status.Line << "'";
-    outs() << ">\n";
-    if (Cursor.getNumOccurrences() != 0)
-      outs() << "<cursor>"
-             << FormatChanges.getShiftedCodePosition(CursorPosition)
-             << "</cursor>\n";
-
-    outputReplacementsXML(Replaces);
-    outs() << "</replacements>\n";
+  if (OutputXML || DryRun) {
+    if (DryRun) {
+      return emitReplacementWarnings(Replaces, AssumedFileName, Code);
+    } else {
+      outputXML(Replaces, FormatChanges, Status, Cursor, CursorPosition);
+    }
   } else {
     IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
         new llvm::vfs::InMemoryFileSystem);
@@ -370,6 +483,38 @@
   OS << clang::getClangToolFullVersion("clang-format") << '\n';
 }
 
+// Dump the configuration.
+static int dumpConfig() {
+  StringRef FileName;
+  std::unique_ptr<llvm::MemoryBuffer> Code;
+  if (FileNames.empty()) {
+    // We can't read the code to detect the language if there's no
+    // file name, so leave Code empty here.
+    FileName = AssumeFileName;
+  } else {
+    // Read in the code in case the filename alone isn't enough to
+    // detect the language.
+    ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
+        MemoryBuffer::getFileOrSTDIN(FileNames[0]);
+    if (std::error_code EC = CodeOrErr.getError()) {
+      llvm::errs() << EC.message() << "\n";
+      return 1;
+    }
+    FileName = (FileNames[0] == "-") ? AssumeFileName : FileNames[0];
+    Code = std::move(CodeOrErr.get());
+  }
+  llvm::Expected<clang::format::FormatStyle> FormatStyle =
+      clang::format::getStyle(Style, FileName, FallbackStyle,
+                              Code ? Code->getBuffer() : "");
+  if (!FormatStyle) {
+    llvm::errs() << llvm::toString(FormatStyle.takeError()) << "\n";
+    return 1;
+  }
+  std::string Config = clang::format::configurationAsText(*FormatStyle);
+  outs() << Config << "\n";
+  return 0;
+}
+
 int main(int argc, const char **argv) {
   llvm::InitLLVM X(argc, argv);
 
@@ -391,34 +536,7 @@
   }
 
   if (DumpConfig) {
-    StringRef FileName;
-    std::unique_ptr<llvm::MemoryBuffer> Code;
-    if (FileNames.empty()) {
-      // We can't read the code to detect the language if there's no
-      // file name, so leave Code empty here.
-      FileName = AssumeFileName;
-    } else {
-      // Read in the code in case the filename alone isn't enough to
-      // detect the language.
-      ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
-          MemoryBuffer::getFileOrSTDIN(FileNames[0]);
-      if (std::error_code EC = CodeOrErr.getError()) {
-        llvm::errs() << EC.message() << "\n";
-        return 1;
-      }
-      FileName = (FileNames[0] == "-") ? AssumeFileName : FileNames[0];
-      Code = std::move(CodeOrErr.get());
-    }
-    llvm::Expected<clang::format::FormatStyle> FormatStyle =
-        clang::format::getStyle(Style, FileName, FallbackStyle,
-                                Code ? Code->getBuffer() : "");
-    if (!FormatStyle) {
-      llvm::errs() << llvm::toString(FormatStyle.takeError()) << "\n";
-      return 1;
-    }
-    std::string Config = clang::format::configurationAsText(*FormatStyle);
-    outs() << Config << "\n";
-    return 0;
+    return dumpConfig();
   }
 
   bool Error = false;