Loading Makefile +5 −16 Original line number Diff line number Diff line Loading @@ -6,28 +6,17 @@ CC = gcc CFLAGS = -g $(INCLUDES) $(DEFINES) -Wall -Wstrict-aliasing=2 -O3 -rdynamic .SUFFIXES: .c .cpp tmp/%.o: src/%.c tmp tmp/%.o: src/%.c $(CC) $(CFLAGS) -c $< -o $@ SRC = $(wildcard src/*.c) OBJ = $(addprefix tmp/, $(notdir $(addsuffix .o, $(basename $(SRC))))) bin/masscan: $(OBJ) bin bin/masscan: $(OBJ) $(CC) $(CFLAGS) -o $@ $(OBJ) -lm $(LIBS) -lstdc++ bin: mkdir bin tmp: mkdir tmp depend: makedepend $(CFLAGS) -Y $(SRC) clean: rm -f $(OBJ) all: bin/masscan rm tmp/*.o rm bin/masscan default: bin/masscan README.md +50 −41 Original line number Diff line number Diff line # MASSCAN: Mass IPv4 port scanner This port scanner with the following features: This is a port scanner with the following features: * very large ranges (like the entire Internet or 10.x.x.x) * very fast (millions of packets/second) * randomization of port/IP combo Loading @@ -11,10 +11,6 @@ This port scanner has the following limitations: * only tests if port is open/closed, no banner checking * only 'raw' packet support # Status Only compiles on Windows at the moment, but it's generic ANSI C and libpcap, so only minor changes are needed to make it work on Linux. # Building Loading @@ -28,12 +24,12 @@ Then type make, there is no configuration step. On Windows, use the VisualStudio 2010 project. # Regression testing The project contains a built-in self-test using the '-T' option. Run it like the following: The project contains a built-in self-test: $ masscan -T $ masscan --selftest selftest: success! If the self-test succeeds, you'll get a simple success message, and the Loading @@ -46,61 +42,74 @@ It's just testing the invidual units within the program. I plan to create an online test, where a second program listens on the network to verify that what's transmitted is the same thing that was specified to be sent. # Usage An example usage is the following: $ masscan -p80,8000-8100 10.0.0.0/8 --rate=1000 --ignore=killist.txt $ masscan -i eth0 -p80,8000-8100 10.0.0.0/8 -c settings.conf This will: * scan the 10.x.x.x subnet, all 16 million addresses * transmits at a rate of 1000 packets/second * scans port 80 and the range 8000 to 8100, or 102 addresses total * ignores any address ranges in the file killist.txt # How it works ## Setting router MAC address (IMPORTANT!!) Using a custom network driver (PF_RING, DPDK), a low-end computer can transmit packets at a rate of 15-million packets/second. This means we can scan the entire Internet of port 80 in under five minutes. You need to set the destination router's MAC address. I haven't added the code to figure this out yet. This is done by putting it in the configuration file: This assumes trivial overhead for generating packets. That's the purpose of this program: to do the least amount of processing per packet possible. We start with a 15-mpps packet generator, and then work backwards to figure out the minimal logic to create those packets. router-mac = 00:11:22:33:44:55 This also assumes that packets don't get dropped on reception. If we attempt to send 15-mpps at a target subnet, most will get dropped. In addition, this will annoy the target. While we can send packets at that rate, we need to make sure nobody receives them at that rate. or on the command line We solve this problem by randomizing the order in which we send packets. Assuming we are scanning ALL ports and ALL IPv4 addresses, this means that packet we send will have a completely random IPv4 address and port number. $ masscan --router-mac=00:11:22:33:44:55 One way to randomize is to keep track of "state", consisting of a table of things we have yet to transmit. This is messy. It would consume a huge amount of memory, and be slow as each packet caused one or more cache misses. ## Transmit rate (IMPORTANT!!) A better way is to first assign each packet a sequence number, then use an algorithm that creates a 1-to-1 translation to a new sequence. In other words: seqno = translate(seqno); We need to look for a mathematical algorithm that has this 1-to-1 property. This program spews out packets very fast. Even in virtual-machine through a virtualized network layer, it can transmit 200,000 packets per second. This will overload a lot of network. By default, the program attempts to throttle transmission, but this code is broken at the moment. The LCG algorithm fits this property. Given an input sequence of numbers, such as 1 through 10, it'll spit them out in random order, without keeping state. (LCG stands for "linear-congruential-generator"). One problem with the LCG is that it needs the right constants. To do that requires hunting for primes. So one of the major complications is the code that calculates them on the fly. For a very large range, such as scanning the entire Internet, it'll take a while to do the calculation. # How it works Here are some notes on the design. ## Spews out packets asynchronously This is an **asynchronous** program. That means it has a single thread that spews out packets indiscriminately without waiting for responses. Another thread collects the responses. This has lots of subtle consequences. For example, you can't use this program to scan the local subnet, because it can't ARP targets and wait for responses -- that's synchronous thinking. ## Randomization Packets are sent in a random order, randomizing simultaneously the IPv4 address and the port. In other words, if you are scanning the entire Internet at a very fast rate, somebody owning a Class C network will see a very slow rate of packets. The way we do this randomization is that we assign every IP/port combo a sequence number, then use a function that looks like: seqno = translate(seqno); The `translate()` function uses some quirky math, based on the LCG PRNG (the basic random number generator we are all familiar with) to do this translation. The key property here is that we can completely randomize the order withou keeping any state in memory. In other words, scanning the entire Internet for all ports is a 48-bit problem (32-bit address and 16-bit port), but we accomplish this with only a few kilobytes of memory. bin/.gitignore 0 → 100644 +6 −0 Original line number Diff line number Diff line masscan masscan.exe masscan.ilk masscan.pdb src/main-conf.c +15 −16 Original line number Diff line number Diff line Loading @@ -81,19 +81,6 @@ masscan_echo(struct Masscan *masscan) printf("rate = %10.2f\n", masscan->max_rate); printf("adapter = %s\n", masscan->ifname); printf("adapter.ip = %u.%u.%u.%u\n", (masscan->adapter_ip>>24)&0xFF, (masscan->adapter_ip>>16)&0xFF, (masscan->adapter_ip>> 8)&0xFF, (masscan->adapter_ip>> 0)&0xFF ); printf("adapter.mac = %02x:%02x:%02x:%02x:%02x:%02x\n", masscan->adapter_mac[0], masscan->adapter_mac[1], masscan->adapter_mac[2], masscan->adapter_mac[3], masscan->adapter_mac[4], masscan->adapter_mac[5]); printf("router.mac = %02x:%02x:%02x:%02x:%02x:%02x\n", masscan->router_mac[0], masscan->router_mac[1], Loading Loading @@ -280,7 +267,7 @@ int parse_mac_address(const char *text, unsigned char *mac) x |= hexval(c); text++; mac[i] = x; mac[i] = (unsigned char)x; if (ispunct(*text & 0xFF)) text++; Loading Loading @@ -443,6 +430,10 @@ masscan_set_parameter(struct Masscan *masscan, const char *name, const char *val else if (EQUALS("echo", name)) { masscan_echo(masscan); exit(1); } else if (EQUALS("selftest", name) || EQUALS("self-test", name)) { masscan->op = Operation_Selftest; return; } else { fprintf(stderr, "CONF: unknown config option: %s=%s\n", name, value); } Loading @@ -451,8 +442,9 @@ masscan_set_parameter(struct Masscan *masscan, const char *name, const char *val static int is_singleton(const char *name) { if (EQUALS("echo", name)) return 1; if (EQUALS("echo", name)) return 1; if (EQUALS("selftest", name)) return 1; if (EQUALS("self-test", name)) return 1; return 0; } Loading Loading @@ -518,6 +510,13 @@ masscan_command_line(struct Masscan *masscan, int argc, char *argv[]) const char *arg; switch (argv[i][1]) { case 'c': if (argv[i][2]) arg = argv[i]+2; else arg = argv[++i]; masscan_read_config_file(masscan, arg); break; case 'v': { Loading src/main-throttle.c +2 −2 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ throttler_next_batch(struct Throttler *throttler, uint64_t count) double new_rate; if (packets_sent < 1.01) return throttler->max_batch; return (uint64_t)throttler->max_batch; /* BOUNDARY CASE: if the elapsed time is zero, or very small, we * get confused. Therefore, handle this case specially */ Loading Loading @@ -72,7 +72,7 @@ throttler_next_batch(struct Throttler *throttler, uint64_t count) } /* wait a bit */ x = waittime * 1000000.0; x = (unsigned)waittime * 1000000.0; x1 = port_gettime(); port_usleep(x); Loading Loading
Makefile +5 −16 Original line number Diff line number Diff line Loading @@ -6,28 +6,17 @@ CC = gcc CFLAGS = -g $(INCLUDES) $(DEFINES) -Wall -Wstrict-aliasing=2 -O3 -rdynamic .SUFFIXES: .c .cpp tmp/%.o: src/%.c tmp tmp/%.o: src/%.c $(CC) $(CFLAGS) -c $< -o $@ SRC = $(wildcard src/*.c) OBJ = $(addprefix tmp/, $(notdir $(addsuffix .o, $(basename $(SRC))))) bin/masscan: $(OBJ) bin bin/masscan: $(OBJ) $(CC) $(CFLAGS) -o $@ $(OBJ) -lm $(LIBS) -lstdc++ bin: mkdir bin tmp: mkdir tmp depend: makedepend $(CFLAGS) -Y $(SRC) clean: rm -f $(OBJ) all: bin/masscan rm tmp/*.o rm bin/masscan default: bin/masscan
README.md +50 −41 Original line number Diff line number Diff line # MASSCAN: Mass IPv4 port scanner This port scanner with the following features: This is a port scanner with the following features: * very large ranges (like the entire Internet or 10.x.x.x) * very fast (millions of packets/second) * randomization of port/IP combo Loading @@ -11,10 +11,6 @@ This port scanner has the following limitations: * only tests if port is open/closed, no banner checking * only 'raw' packet support # Status Only compiles on Windows at the moment, but it's generic ANSI C and libpcap, so only minor changes are needed to make it work on Linux. # Building Loading @@ -28,12 +24,12 @@ Then type make, there is no configuration step. On Windows, use the VisualStudio 2010 project. # Regression testing The project contains a built-in self-test using the '-T' option. Run it like the following: The project contains a built-in self-test: $ masscan -T $ masscan --selftest selftest: success! If the self-test succeeds, you'll get a simple success message, and the Loading @@ -46,61 +42,74 @@ It's just testing the invidual units within the program. I plan to create an online test, where a second program listens on the network to verify that what's transmitted is the same thing that was specified to be sent. # Usage An example usage is the following: $ masscan -p80,8000-8100 10.0.0.0/8 --rate=1000 --ignore=killist.txt $ masscan -i eth0 -p80,8000-8100 10.0.0.0/8 -c settings.conf This will: * scan the 10.x.x.x subnet, all 16 million addresses * transmits at a rate of 1000 packets/second * scans port 80 and the range 8000 to 8100, or 102 addresses total * ignores any address ranges in the file killist.txt # How it works ## Setting router MAC address (IMPORTANT!!) Using a custom network driver (PF_RING, DPDK), a low-end computer can transmit packets at a rate of 15-million packets/second. This means we can scan the entire Internet of port 80 in under five minutes. You need to set the destination router's MAC address. I haven't added the code to figure this out yet. This is done by putting it in the configuration file: This assumes trivial overhead for generating packets. That's the purpose of this program: to do the least amount of processing per packet possible. We start with a 15-mpps packet generator, and then work backwards to figure out the minimal logic to create those packets. router-mac = 00:11:22:33:44:55 This also assumes that packets don't get dropped on reception. If we attempt to send 15-mpps at a target subnet, most will get dropped. In addition, this will annoy the target. While we can send packets at that rate, we need to make sure nobody receives them at that rate. or on the command line We solve this problem by randomizing the order in which we send packets. Assuming we are scanning ALL ports and ALL IPv4 addresses, this means that packet we send will have a completely random IPv4 address and port number. $ masscan --router-mac=00:11:22:33:44:55 One way to randomize is to keep track of "state", consisting of a table of things we have yet to transmit. This is messy. It would consume a huge amount of memory, and be slow as each packet caused one or more cache misses. ## Transmit rate (IMPORTANT!!) A better way is to first assign each packet a sequence number, then use an algorithm that creates a 1-to-1 translation to a new sequence. In other words: seqno = translate(seqno); We need to look for a mathematical algorithm that has this 1-to-1 property. This program spews out packets very fast. Even in virtual-machine through a virtualized network layer, it can transmit 200,000 packets per second. This will overload a lot of network. By default, the program attempts to throttle transmission, but this code is broken at the moment. The LCG algorithm fits this property. Given an input sequence of numbers, such as 1 through 10, it'll spit them out in random order, without keeping state. (LCG stands for "linear-congruential-generator"). One problem with the LCG is that it needs the right constants. To do that requires hunting for primes. So one of the major complications is the code that calculates them on the fly. For a very large range, such as scanning the entire Internet, it'll take a while to do the calculation. # How it works Here are some notes on the design. ## Spews out packets asynchronously This is an **asynchronous** program. That means it has a single thread that spews out packets indiscriminately without waiting for responses. Another thread collects the responses. This has lots of subtle consequences. For example, you can't use this program to scan the local subnet, because it can't ARP targets and wait for responses -- that's synchronous thinking. ## Randomization Packets are sent in a random order, randomizing simultaneously the IPv4 address and the port. In other words, if you are scanning the entire Internet at a very fast rate, somebody owning a Class C network will see a very slow rate of packets. The way we do this randomization is that we assign every IP/port combo a sequence number, then use a function that looks like: seqno = translate(seqno); The `translate()` function uses some quirky math, based on the LCG PRNG (the basic random number generator we are all familiar with) to do this translation. The key property here is that we can completely randomize the order withou keeping any state in memory. In other words, scanning the entire Internet for all ports is a 48-bit problem (32-bit address and 16-bit port), but we accomplish this with only a few kilobytes of memory.
bin/.gitignore 0 → 100644 +6 −0 Original line number Diff line number Diff line masscan masscan.exe masscan.ilk masscan.pdb
src/main-conf.c +15 −16 Original line number Diff line number Diff line Loading @@ -81,19 +81,6 @@ masscan_echo(struct Masscan *masscan) printf("rate = %10.2f\n", masscan->max_rate); printf("adapter = %s\n", masscan->ifname); printf("adapter.ip = %u.%u.%u.%u\n", (masscan->adapter_ip>>24)&0xFF, (masscan->adapter_ip>>16)&0xFF, (masscan->adapter_ip>> 8)&0xFF, (masscan->adapter_ip>> 0)&0xFF ); printf("adapter.mac = %02x:%02x:%02x:%02x:%02x:%02x\n", masscan->adapter_mac[0], masscan->adapter_mac[1], masscan->adapter_mac[2], masscan->adapter_mac[3], masscan->adapter_mac[4], masscan->adapter_mac[5]); printf("router.mac = %02x:%02x:%02x:%02x:%02x:%02x\n", masscan->router_mac[0], masscan->router_mac[1], Loading Loading @@ -280,7 +267,7 @@ int parse_mac_address(const char *text, unsigned char *mac) x |= hexval(c); text++; mac[i] = x; mac[i] = (unsigned char)x; if (ispunct(*text & 0xFF)) text++; Loading Loading @@ -443,6 +430,10 @@ masscan_set_parameter(struct Masscan *masscan, const char *name, const char *val else if (EQUALS("echo", name)) { masscan_echo(masscan); exit(1); } else if (EQUALS("selftest", name) || EQUALS("self-test", name)) { masscan->op = Operation_Selftest; return; } else { fprintf(stderr, "CONF: unknown config option: %s=%s\n", name, value); } Loading @@ -451,8 +442,9 @@ masscan_set_parameter(struct Masscan *masscan, const char *name, const char *val static int is_singleton(const char *name) { if (EQUALS("echo", name)) return 1; if (EQUALS("echo", name)) return 1; if (EQUALS("selftest", name)) return 1; if (EQUALS("self-test", name)) return 1; return 0; } Loading Loading @@ -518,6 +510,13 @@ masscan_command_line(struct Masscan *masscan, int argc, char *argv[]) const char *arg; switch (argv[i][1]) { case 'c': if (argv[i][2]) arg = argv[i]+2; else arg = argv[++i]; masscan_read_config_file(masscan, arg); break; case 'v': { Loading
src/main-throttle.c +2 −2 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ throttler_next_batch(struct Throttler *throttler, uint64_t count) double new_rate; if (packets_sent < 1.01) return throttler->max_batch; return (uint64_t)throttler->max_batch; /* BOUNDARY CASE: if the elapsed time is zero, or very small, we * get confused. Therefore, handle this case specially */ Loading Loading @@ -72,7 +72,7 @@ throttler_next_batch(struct Throttler *throttler, uint64_t count) } /* wait a bit */ x = waittime * 1000000.0; x = (unsigned)waittime * 1000000.0; x1 = port_gettime(); port_usleep(x); Loading