|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185 |
- /*
- Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
-
- Permission is hereby granted, free of charge, to any person obtaining a copy of
- this software and associated documentation files (the "Software"), to deal in
- the Software without restriction, including without limitation the rights to
- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- of the Software, and to permit persons to whom the Software is furnished to do
- so, subject to the following conditions:
-
- The above copyright notice and this permission notice shall be included in all
- copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
- */
-
- #include "definitions.h"
-
- /*
- This file should be kept up to date with mwparserfromhell/definitions.py.
- See the Python version for data sources.
- */
-
- static const char* URI_SCHEMES[] = {
- "bitcoin",
- "ftp",
- "ftps",
- "geo",
- "git",
- "gopher",
- "http",
- "https",
- "irc",
- "ircs",
- "magnet",
- "mailto",
- "mms",
- "news",
- "nntp",
- "redis",
- "sftp",
- "sip",
- "sips",
- "sms",
- "ssh",
- "svn",
- "tel",
- "telnet",
- "urn",
- "worldwind",
- "xmpp",
- NULL,
- };
-
- static const char* URI_SCHEMES_AUTHORITY_OPTIONAL[] = {
- "bitcoin",
- "geo",
- "magnet",
- "mailto",
- "news",
- "sip",
- "sips",
- "sms",
- "tel",
- "urn",
- "xmpp",
- NULL,
- };
-
- static const char* PARSER_BLACKLIST[] = {
- "categorytree",
- "ce",
- "chem",
- "gallery",
- "graph",
- "hiero",
- "imagemap",
- "inputbox",
- "math",
- "nowiki",
- "pre",
- "score",
- "section",
- "source",
- "syntaxhighlight",
- "templatedata",
- "timeline",
- NULL,
- };
-
- static const char* SINGLE[] = {
- "br", "wbr", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td",
- "tr", NULL
- };
-
- static const char* SINGLE_ONLY[] = {
- "br", "wbr", "hr", "meta", "link", "img", NULL
- };
-
- /*
- Convert a PyUnicodeObject to a lowercase ASCII char* array and store it in
- the second argument. The caller must free the return value when finished.
- If the return value is NULL, the conversion failed and *string is not set.
- */
- static PyObject* unicode_to_lcase_ascii(PyObject *input, const char **string)
- {
- PyObject *lower = PyObject_CallMethod(input, "lower", NULL), *bytes;
-
- if (!lower)
- return NULL;
- bytes = PyUnicode_AsASCIIString(lower);
- Py_DECREF(lower);
- if (!bytes) {
- if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
- PyErr_Clear();
- return NULL;
- }
- *string = PyBytes_AS_STRING(bytes);
- return bytes;
- }
-
- /*
- Return whether a PyUnicodeObject is in a list of lowercase ASCII strings.
- */
- static int unicode_in_string_list(PyObject *input, const char **list)
- {
- const char *string;
- PyObject *temp = unicode_to_lcase_ascii(input, &string);
- int retval = 0;
-
- if (!temp)
- return 0;
-
- while (*list) {
- if (!strcmp(*(list++), string)) {
- retval = 1;
- goto end;
- }
- }
-
- end:
- Py_DECREF(temp);
- return retval;
- }
-
- /*
- Return if the given tag's contents should be passed to the parser.
- */
- int is_parsable(PyObject *tag)
- {
- return !unicode_in_string_list(tag, PARSER_BLACKLIST);
- }
-
- /*
- Return whether or not the given tag can exist without a close tag.
- */
- int is_single(PyObject *tag)
- {
- return unicode_in_string_list(tag, SINGLE);
- }
-
- /*
- Return whether or not the given tag must exist without a close tag.
- */
- int is_single_only(PyObject *tag)
- {
- return unicode_in_string_list(tag, SINGLE_ONLY);
- }
-
- /*
- Return whether the given scheme is valid for external links.
- */
- int is_scheme(PyObject *scheme, int slashes)
- {
- if (slashes)
- return unicode_in_string_list(scheme, URI_SCHEMES);
- else
- return unicode_in_string_list(scheme, URI_SCHEMES_AUTHORITY_OPTIONAL);
- }
|