A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

180 lines
7.0 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2009-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from collections import namedtuple
  23. import re
  24. import socket
  25. from socket import AF_INET, AF_INET6
  26. from earwigbot.commands import Command
  27. _IP = namedtuple("_IP", ["family", "ip", "size"])
  28. _Range = namedtuple("_Range", [
  29. "family", "range", "low", "high", "size", "addresses"])
  30. class CIDR(Command):
  31. """Calculates the smallest CIDR range that encompasses a list of IP
  32. addresses. Used to make range blocks."""
  33. name = "cidr"
  34. commands = ["cidr", "range", "rangeblock", "rangecalc", "blockcalc",
  35. "iprange", "cdir"]
  36. # https://www.mediawiki.org/wiki/Manual:$wgBlockCIDRLimit
  37. LIMIT_IPv4 = 16
  38. LIMIT_IPv6 = 19
  39. def process(self, data):
  40. if not data.args:
  41. msg = ("Specify a list of IP addresses to calculate a CIDR range "
  42. "for. For example, \x0306!{0} 192.168.0.3 192.168.0.15 "
  43. "192.168.1.4\x0F or \x0306!{0} 2500:1:2:3:: "
  44. "2500:1:2:3:dead:beef::\x0F.")
  45. self.reply(data, msg.format(data.command))
  46. return
  47. try:
  48. ips = [self._parse_ip(arg) for arg in data.args]
  49. except ValueError as exc:
  50. msg = "Can't parse IP address \x0302{0}\x0F."
  51. self.reply(data, msg.format(exc.message))
  52. return
  53. if any(ip.family == AF_INET for ip in ips) and any(
  54. ip.family == AF_INET6 for ip in ips):
  55. msg = "Can't calculate a range for both IPv4 and IPv6 addresses."
  56. self.reply(data, msg)
  57. return
  58. cidr = self._calculate_range(ips[0].family, ips)
  59. descr = self._describe(cidr.family, cidr.size)
  60. msg = ("Smallest CIDR range is \x02{0}\x0F, covering {1} from "
  61. "\x0305{2}\x0F to \x0305{3}\x0F{4}.")
  62. self.reply(data, msg.format(
  63. cidr.range, cidr.addresses, cidr.low, cidr.high,
  64. " (\x0304{0}\x0F)".format(descr) if descr else ""))
  65. def _parse_ip(self, arg):
  66. """Converts an argument into an IP address object."""
  67. arg = self._parse_arg(arg)
  68. oldarg = arg
  69. size = None
  70. if "/" in arg:
  71. arg, size = arg.split("/", 1)
  72. try:
  73. size = int(size, 10)
  74. except ValueError:
  75. raise ValueError(oldarg)
  76. if size < 0 or size > 128:
  77. raise ValueError(oldarg)
  78. try:
  79. ip = _IP(AF_INET, socket.inet_pton(AF_INET, arg), size)
  80. except socket.error:
  81. try:
  82. return _IP(AF_INET6, socket.inet_pton(AF_INET6, arg), size)
  83. except socket.error:
  84. raise ValueError(oldarg)
  85. if size > 32:
  86. raise ValueError(oldarg)
  87. return ip
  88. def _parse_arg(self, arg):
  89. """Converts an argument into an IP address string."""
  90. if "[[" in arg and "]]" in arg:
  91. regex = r"\[\[\s*(?:User(?:\stalk)?:)?(.*?)(?:\|.*?)?\s*\]\]"
  92. match = re.search(regex, arg, re.I)
  93. if not match:
  94. raise ValueError(arg)
  95. arg = match.group(1)
  96. if re.match(r"https?://", arg):
  97. if "target=" in arg:
  98. regex = r"target=(.*?)(?:&|$)"
  99. elif "page=" in arg:
  100. regex = r"page=(?:User(?:(?:\s|_)talk)?(?::|%3A))?(.*?)(?:&|$)"
  101. elif re.search(r"Special(:|%3A)Contributions/", arg, re.I):
  102. regex = r"Special(?:\:|%3A)Contributions/(.*?)(?:\&|\?|$)"
  103. elif re.search(r"User((\s|_)talk)?(:|%3A)", arg, re.I):
  104. regex = r"User(?:(?:\s|_)talk)?(?:\:|%3A)(.*?)(?:\&|\?|$)"
  105. else:
  106. raise ValueError(arg)
  107. match = re.search(regex, arg, re.I)
  108. if not match:
  109. raise ValueError(arg)
  110. arg = match.group(1)
  111. return arg
  112. def _calculate_range(self, family, ips):
  113. """Calculate the smallest CIDR range encompassing a list of IPs."""
  114. bin_ips = ["".join(
  115. bin(ord(octet))[2:].zfill(8) for octet in ip.ip) for ip in ips]
  116. for i, ip in enumerate(ips):
  117. if ip.size is not None:
  118. suffix = "X" * (len(bin_ips[i]) - ip.size)
  119. bin_ips[i] = bin_ips[i][:ip.size] + suffix
  120. size = len(bin_ips[0])
  121. for i in range(len(bin_ips[0])):
  122. if any(ip[i] == "X" for ip in bin_ips) or (
  123. any(ip[i] == "0" for ip in bin_ips) and
  124. any(ip[i] == "1" for ip in bin_ips)):
  125. size = i
  126. break
  127. bin_low = bin_ips[0][:size].ljust(len(bin_ips[0]), "0")
  128. bin_high = bin_ips[0][:size].ljust(len(bin_ips[0]), "1")
  129. low = self._format_bin(family, bin_low)
  130. high = self._format_bin(family, bin_high)
  131. return _Range(
  132. family, low + "/" + str(size), low, high, size,
  133. self._format_count(2 ** (len(bin_ips[0]) - size)))
  134. @staticmethod
  135. def _format_bin(family, binary):
  136. """Convert an IP's binary representation to presentation format."""
  137. return socket.inet_ntop(family, "".join(
  138. chr(int(binary[i:i + 8], 2)) for i in range(0, len(binary), 8)))
  139. @staticmethod
  140. def _format_count(count):
  141. """Nicely format a number of addresses affected by a range block."""
  142. if count == 1:
  143. return "1 address"
  144. if count > 2 ** 32:
  145. base = "{0:.2E} addresses".format(count)
  146. if count == 2 ** 64:
  147. return base + " (1 /64 subnet)"
  148. if count > 2 ** 96:
  149. return base + " ({0:.2E} /64 subnets)".format(count >> 64)
  150. if count > 2 ** 63:
  151. return base + " ({0:,} /64 subnets)".format(count >> 64)
  152. return base
  153. return "{0:,} addresses".format(count)
  154. def _describe(self, family, size):
  155. """Return an optional English description of a range."""
  156. if (family == AF_INET and size < self.LIMIT_IPv4) or (
  157. family == AF_INET6 and size < self.LIMIT_IPv6):
  158. return "too large to block"