From e84c8031d6283535f7c393a95289aaebf7ef6280 Mon Sep 17 00:00:00 2001
From: boB Rudis <bob@rud.is>
Date: Wed, 18 Oct 2017 22:25:05 -0400
Subject: [PATCH] ndjson prep

---
 paused.conf      |  32 ++++++++++
 src/main-conf.c  |   9 ++-
 src/masscan.h    |   3 +-
 src/out-ndjson.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++
 src/output.c     |   7 ++-
 src/output.h     |   1 +
 src/smack1.c     |   2 +
 7 files changed, 203 insertions(+), 5 deletions(-)
 create mode 100644 paused.conf
 create mode 100644 src/out-ndjson.c

diff --git a/paused.conf b/paused.conf
new file mode 100644
index 0000000..025f056
--- /dev/null
+++ b/paused.conf
@@ -0,0 +1,32 @@
+
+# resume information
+resume-index = 1
+rate =     100.00
+randomize-hosts = true
+seed = 1568398576484847982
+shard = 1/1
+# ADAPTER SETTINGS
+adapter = 
+adapter-ip = 10.1.10.196
+adapter-mac = 00:00:00:00:00:00
+router-mac = 00:00:00:00:00:00
+# OUTPUT/REPORTING SETTINGS
+output-format = unknown(0)
+show = open,,
+output-filename = 
+rotate = 0
+rotate-dir = .
+rotate-offset = 0
+rotate-filesize = 0
+pcap = 
+# TARGET SELECTION (IP, PORTS, EXCLUDES)
+retries = 0
+ports = 80
+range = 10.1.10.0/24
+
+capture = cert
+nocapture = html
+nocapture = heartbleed
+nocapture = ticketbleed
+
+min-packet = 60
diff --git a/src/main-conf.c b/src/main-conf.c
index d533928..100b1d9 100644
--- a/src/main-conf.c
+++ b/src/main-conf.c
@@ -179,10 +179,10 @@ print_nmap_help(void)
 "  --ttl <val>: Set IP time-to-live field\n"
 "  --spoof-mac <mac address/prefix/vendor name>: Spoof your MAC address\n"
 "OUTPUT:\n"
-"  --output-format <format>: Sets output to binary/list/unicornscan/json/grepable/xml\n"
+"  --output-format <format>: Sets output to binary/list/unicornscan/json/ndjson/grepable/xml\n"
 "  --output-file <file>: Write scan results to file. If --output-format is\n"
 "     not given default is xml\n"
-"  -oL/-oJ/-oG/-oB/-oX/-oU <file>: Output scan in List/JSON/Grepable/Binary/XML/Unicornscan format,\n"
+"  -oL/-oJ/-oD/-oG/-oB/-oX/-oU <file>: Output scan in List/JSON/nDjson/Grepable/Binary/XML/Unicornscan format,\n"
 "     respectively, to the given filename. Shortcut for\n"
 "     --output-format <format> --output-file <file>\n"
 "  -v: Increase verbosity level (use -vv or more for greater effect)\n"
@@ -328,6 +328,7 @@ masscan_echo(struct Masscan *masscan, FILE *fp)
     case Output_Binary:     fprintf(fp, "output-format = binary\n"); break;
     case Output_Grepable:   fprintf(fp, "output-format = grepable\n"); break;
     case Output_JSON:       fprintf(fp, "output-format = json\n"); break;
+    case Output_NDJSON:     fprintf(fp, "output-format = ndjson\n"); break;
     case Output_Certs:      fprintf(fp, "output-format = certs\n"); break;
     case Output_None:       fprintf(fp, "output-format = none\n"); break;
     case Output_Redis:
@@ -1493,6 +1494,7 @@ masscan_set_parameter(struct Masscan *masscan,
         else if (EQUALS("greppable", value))    x = Output_Grepable;
         else if (EQUALS("grepable", value))     x = Output_Grepable;
         else if (EQUALS("json", value))         x = Output_JSON;
+        else if (EQUALS("ndjson", value))       x = Output_NDJSON;
         else if (EQUALS("certs", value))        x = Output_Certs;
         else if (EQUALS("none", value))         x = Output_None;
         else if (EQUALS("redis", value))        x = Output_Redis;
@@ -1965,6 +1967,9 @@ masscan_command_line(struct Masscan *masscan, int argc, char *argv[])
                 case 'B':
                     masscan->output.format = Output_Binary;
                     break;
+                case 'D':
+                    masscan->output.format = Output_NDJSON;
+                    break;
                 case 'J':
                     masscan->output.format = Output_JSON;
                     break;
diff --git a/src/masscan.h b/src/masscan.h
index 41dccf5..124b812 100644
--- a/src/masscan.h
+++ b/src/masscan.h
@@ -47,6 +47,7 @@ enum OutputFormat {
     Output_Binary       = 0x0004,   /* -oB, "binary", the primary format */
     Output_XML          = 0x0008,   /* -oX, "xml" */
     Output_JSON         = 0x0010,   /* -oJ, "json" */
+    Output_NDJSON       = 0x0011,   /* -oD, "ndjson" */
     Output_Nmap         = 0x0020,
     Output_ScriptKiddie = 0x0040,
     Output_Grepable     = 0x0080,   /* -oG, "grepable" */
@@ -240,7 +241,7 @@ struct Masscan
         
         /**
          * --output-format
-         * Examples are "xml", "binary", "json", "grepable", and so on.
+         * Examples are "xml", "binary", "json", "ndjson", "grepable", and so on.
          */
         enum OutputFormat format;
         
diff --git a/src/out-ndjson.c b/src/out-ndjson.c
new file mode 100644
index 0000000..bd785c3
--- /dev/null
+++ b/src/out-ndjson.c
@@ -0,0 +1,154 @@
+#include "output.h"
+#include "masscan-app.h"
+#include "masscan-status.h"
+#include "string_s.h"
+#include <ctype.h>
+
+
+/****************************************************************************
+ ****************************************************************************/
+static void
+ndjson_out_open(struct Output *out, FILE *fp)
+{
+    UNUSEDPARM(out);
+    fprintf(fp, ""); 
+}
+
+
+/****************************************************************************
+ ****************************************************************************/
+static void
+ndjson_out_close(struct Output *out, FILE *fp)
+{
+    UNUSEDPARM(out);
+    fprintf(fp, ""); 
+}
+
+//{ ip: "124.53.139.201", ports: [ {port: 443, proto: "tcp", status: "open", reason: "syn-ack", ttl: 48} ] }
+/****************************************************************************
+ ****************************************************************************/
+static void
+ndjson_out_status(struct Output *out, FILE *fp, time_t timestamp, int status,
+                 unsigned ip, unsigned ip_proto, unsigned port, unsigned reason, unsigned ttl)
+{
+    char reason_buffer[128];
+    UNUSEDPARM(out);
+    //UNUSEDPARM(timestamp);
+
+    fprintf(fp, "{");
+    fprintf(fp, "\"ip\":\"%u.%u.%u.%u\",",
+            (ip>>24)&0xFF, (ip>>16)&0xFF, (ip>> 8)&0xFF, (ip>> 0)&0xFF);
+    fprintf(fp, "\"timestamp\":\"%d\",\"ports\":[{\"port\":%u,\"proto\":\"%s\",\"status\":\"%s\","
+                "\"reason\":\"%s\",\"ttl\":%u}]",
+                (int) timestamp,
+                port,
+                name_from_ip_proto(ip_proto),
+                status_string(status),
+                reason_string(reason, reason_buffer, sizeof(reason_buffer)),
+                ttl
+            );
+    fprintf(fp, "}\n");
+
+
+}
+
+/*****************************************************************************
+ * Remove bad characters from the banner, especially new lines and HTML
+ * control codes.
+ *
+ * Keeping this here since we may need to change the behavior from what 
+ * is done in the sister `normalize_json_string` function. It's unlikely
+ * but it's a small function and will save time later if needed.
+ *****************************************************************************/
+static const char *
+normalize_ndjson_string(const unsigned char *px, size_t length,
+                       char *buf, size_t buf_len)
+{
+    size_t i=0;
+    size_t offset = 0;
+
+
+    for (i=0; i<length; i++) {
+        unsigned char c = px[i];
+
+        if (isprint(c) && c != '<' && c != '>' && c != '&' && c != '\\' && c != '\"' && c != '\'') {
+            if (offset + 2 < buf_len)
+                buf[offset++] = px[i];
+        } else {
+            if (offset + 7 < buf_len) {
+                buf[offset++] = '\\';
+                buf[offset++] = 'u';
+                buf[offset++] = '0';
+                buf[offset++] = '0';
+                buf[offset++] = "0123456789abcdef"[px[i]>>4];
+                buf[offset++] = "0123456789abcdef"[px[i]&0xF];
+            }
+        }
+    }
+
+    buf[offset] = '\0';
+
+    return buf;
+}
+
+/******************************************************************************
+ ******************************************************************************/
+static void
+ndjson_out_banner(struct Output *out, FILE *fp, time_t timestamp,
+                 unsigned ip, unsigned ip_proto, unsigned port,
+                 enum ApplicationProtocol proto,
+                 unsigned ttl,
+                 const unsigned char *px, unsigned length)
+{
+    char banner_buffer[65536];
+
+    UNUSEDPARM(ttl);
+    //UNUSEDPARM(timestamp);
+
+    fprintf(fp, "{");
+    fprintf(fp, "\"ip\":\"%u.%u.%u.%u\",",
+            (ip>>24)&0xFF, (ip>>16)&0xFF, (ip>> 8)&0xFF, (ip>> 0)&0xFF);
+    fprintf(fp, "\"timestamp\":\"%d\",\"ports\":[{\"port\":%u,\"proto\":\"%s\",\"service\":{\"name\":\"%s\",\"banner\":\"%s\"}}]",
+            (int) timestamp,
+            port,
+            name_from_ip_proto(ip_proto),
+            masscan_app_to_string(proto),
+            normalize_ndjson_string(px, length, banner_buffer, sizeof(banner_buffer))
+            );
+    fprintf(fp, "}\n");
+
+    UNUSEDPARM(out);
+
+/*    fprintf(fp, "<host endtime=\"%u\">"
+            "<address addr=\"%u.%u.%u.%u\" addrtype=\"ipv4\"/>"
+            "<ports>"
+            "<port protocol=\"%s\" portid=\"%u\">"
+            "<state state=\"open\" reason=\"%s\" reason_ttl=\"%u\" />"
+            "<service name=\"%s\" banner=\"%s\"></service>"
+            "</port>"
+            "</ports>"
+            "</host>"
+            "\r\n",
+            (unsigned)timestamp,
+            (ip>>24)&0xFF,
+            (ip>>16)&0xFF,
+            (ip>> 8)&0xFF,
+            (ip>> 0)&0xFF,
+            name_from_ip_proto(ip_proto),
+            port,
+            reason, ttl,
+            masscan_app_to_string(proto),
+            normalize_string(px, length, banner_buffer, sizeof(banner_buffer))
+            );*/
+}
+
+/****************************************************************************
+ ****************************************************************************/
+const struct OutputType ndjson_output = {
+    "ndjson",
+    0,
+    ndjson_out_open,
+    ndjson_out_close,
+    ndjson_out_status,
+    ndjson_out_banner
+};
diff --git a/src/output.c b/src/output.c
index b82c042..2b727c3 100644
--- a/src/output.c
+++ b/src/output.c
@@ -433,6 +433,9 @@ output_create(const struct Masscan *masscan, unsigned thread_index)
     case Output_JSON:
         out->funcs = &json_output;
         break;
+    case Output_NDJSON:
+        out->funcs = &ndjson_output;
+        break;
     case Output_Certs:
         out->funcs = &certs_output;
         break;
@@ -821,7 +824,7 @@ output_report_status(struct Output *out, time_t timestamp, int status,
     }
 
     /*
-     * Now do the actual output, whether it be XML, binary, JSON, Redis,
+     * Now do the actual output, whether it be XML, binary, JSON, ndjson, Redis,
      * and so on.
      */
     out->funcs->status(out, fp, timestamp, status, ip, ip_proto, port, reason, ttl);
@@ -896,7 +899,7 @@ output_report_banner(struct Output *out, time_t now,
     }
 
     /*
-     * Now do the actual output, whether it be XML, binary, JSON, Redis,
+     * Now do the actual output, whether it be XML, binary, JSON, ndjson, Redis,
      * and so on.
      */
     out->funcs->banner(out, fp, now, ip, ip_proto, port, proto, ttl, px, length);
diff --git a/src/output.h b/src/output.h
index 1777c7a..6e474ca 100644
--- a/src/output.h
+++ b/src/output.h
@@ -125,6 +125,7 @@ extern const struct OutputType text_output;
 extern const struct OutputType unicornscan_output;
 extern const struct OutputType xml_output;
 extern const struct OutputType json_output;
+extern const struct OutputType ndjson_output;
 extern const struct OutputType certs_output;
 extern const struct OutputType binary_output;
 extern const struct OutputType null_output;
diff --git a/src/smack1.c b/src/smack1.c
index d7baa74..34f72a9 100644
--- a/src/smack1.c
+++ b/src/smack1.c
@@ -115,6 +115,8 @@
 #include "pixie-timer.h"
 #if defined(_MSC_VER)
 #include <intrin.h>
+#elif defined(__llvm__)
+#include <x86intrin.h>
 #elif defined(__GNUC__)
 static __inline__ unsigned long long __rdtsc(void)
 {
-- 
GitLab