A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

html_entities.mwtest 3.8 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. name: named
  2. label: a basic named HTML entity
  3. input: " "
  4. output: [HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd()]
  5. ---
  6. name: numeric_decimal
  7. label: a basic decimal HTML entity
  8. input: "k"
  9. output: [HTMLEntityStart(), HTMLEntityNumeric(), Text(text="107"), HTMLEntityEnd()]
  10. ---
  11. name: numeric_hexadecimal_x
  12. label: a basic hexadecimal HTML entity, using 'x' as a signal
  13. input: "k"
  14. output: [HTMLEntityStart(), HTMLEntityNumeric(), HTMLEntityHex(char="x"), Text(text="6B"), HTMLEntityEnd()]
  15. ---
  16. name: numeric_hexadecimal_X
  17. label: a basic hexadecimal HTML entity, using 'X' as a signal
  18. input: "k"
  19. output: [HTMLEntityStart(), HTMLEntityNumeric(), HTMLEntityHex(char="X"), Text(text="6B"), HTMLEntityEnd()]
  20. ---
  21. name: numeric_decimal_max
  22. label: the maximum acceptable decimal numeric entity
  23. input: ""
  24. output: [HTMLEntityStart(), HTMLEntityNumeric(), Text(text="1114111"), HTMLEntityEnd()]
  25. ---
  26. name: numeric_hex_max
  27. label: the maximum acceptable hexadecimal numeric entity
  28. input: ""
  29. output: [HTMLEntityStart(), HTMLEntityNumeric(), HTMLEntityHex(char="x"), Text(text="10FFFF"), HTMLEntityEnd()]
  30. ---
  31. name: numeric_zeros
  32. label: zeros accepted at the beginning of a numeric entity
  33. input: "k"
  34. output: [HTMLEntityStart(), HTMLEntityNumeric(), Text(text="0000000107"), HTMLEntityEnd()]
  35. ---
  36. name: numeric_hex_zeros
  37. label: zeros accepted at the beginning of a hex numeric entity
  38. input: "ć"
  39. output: [HTMLEntityStart(), HTMLEntityNumeric(), HTMLEntityHex(char="x"), Text(text="0000000107"), HTMLEntityEnd()]
  40. ---
  41. name: invalid_named_too_long
  42. label: a named entity that is too long
  43. input: "&sigmaSigma;"
  44. output: [Text(text="&sigmaSigma;")]
  45. ---
  46. name: invalid_named_undefined
  47. label: a named entity that doesn't exist
  48. input: "&foobar;"
  49. output: [Text(text="&foobar;")]
  50. ---
  51. name: invalid_named_nonascii
  52. label: a named entity with non-ASCII characters
  53. input: "&sígma;"
  54. output: [Text(text="&sígma;")]
  55. ---
  56. name: invalid_numeric_out_of_range_1
  57. label: a numeric entity that is out of range: < 1
  58. input: "&#0;"
  59. output: [Text(text="&#0;")]
  60. ---
  61. name: invalid_numeric_out_of_range_2
  62. label: a hex numeric entity that is out of range: < 1
  63. input: "&#x0;"
  64. output: [Text(text="&#x0;")]
  65. ---
  66. name: invalid_numeric_out_of_range_3
  67. label: a numeric entity that is out of range: > 0x10FFFF
  68. input: "&#1114112;"
  69. output: [Text(text="&#1114112;")]
  70. ---
  71. name: invalid_numeric_out_of_range_4
  72. label: a hex numeric entity that is out of range: > 0x10FFFF
  73. input: "&#x0110000;"
  74. output: [Text(text="&#x0110000;")]
  75. ---
  76. name: invalid_partial_amp
  77. label: invalid entities: just an ampersand
  78. input: "&"
  79. output: [Text(text="&")]
  80. ---
  81. name: invalid_partial_amp_semicolon
  82. label: invalid entities: an ampersand and semicolon
  83. input: "&;"
  84. output: [Text(text="&;")]
  85. ---
  86. name: invalid_partial_amp_pound
  87. label: invalid entities: just an ampersand, pound sign
  88. input: "&#"
  89. output: [Text(text="&#")]
  90. ---
  91. name: invalid_partial_amp_pound_x
  92. label: invalid entities: just an ampersand, pound sign, x
  93. input: "&#x"
  94. output: [Text(text="&#x")]
  95. ---
  96. name: invalid_partial_amp_pound_semicolon
  97. label: invalid entities: an ampersand, pound sign, and semicolon
  98. input: "&#;"
  99. output: [Text(text="&#;")]
  100. ---
  101. name: invalid_partial_amp_pound_x_semicolon
  102. label: invalid entities: an ampersand, pound sign, x, and semicolon
  103. input: "&#x;"
  104. output: [Text(text="&#x;")]
  105. ---
  106. name: invalid_partial_amp_pound_numbers
  107. label: invalid entities: an ampersand, pound sign, numbers
  108. input: "&#123"
  109. output: [Text(text="&#123")]
  110. ---
  111. name: invalid_partial_amp_pound_x_semicolon
  112. label: invalid entities: an ampersand, pound sign, and x
  113. input: "&#x"
  114. output: [Text(text="&#x")]
  115. ---
  116. name: invalid_zeros_before_named
  117. label: invalid entities: zeros before a valid named entity
  118. input: "&000nbsp;"
  119. output: [Text(text="&000nbsp;")]