Remove auto-quotes from queries; add min_query; halve max_query.

pirms 10 gadiem · 3e4dac967d
--- a/earwigbot/wiki/copyvios/parsers.py
+++ b/earwigbot/wiki/copyvios/parsers.py
@@ -74,7 +74,7 @@ class ArticleTextParser(BaseTextParser):
        self.clean = clean.replace("\n\n", "\n").strip()
        return self.clean

    def chunk(self, nltk_dir, max_chunks, max_query=256):
    def chunk(self, nltk_dir, max_chunks, min_query=8, max_query=128):
        """Convert the clean article text into a list of web-searchable chunks.

        No greater than *max_chunks* will be returned. Each chunk will only be
@@ -99,6 +99,8 @@ class ArticleTextParser(BaseTextParser):

        sentences = []
        for sentence in tokenizer.tokenize(self.clean):
            if len(sentence) < min_query:
                continue
            if len(sentence) > max_query:
                words = sentence.split()
                while len(" ".join(words)) > max_query:
--- a/earwigbot/wiki/copyvios/search.py
+++ b/earwigbot/wiki/copyvios/search.py
@@ -77,7 +77,7 @@ class YahooBOSSSearchEngine(BaseSearchEngine):
            "oauth_nonce": oauth.generate_nonce(),
            "oauth_timestamp": oauth.Request.make_timestamp(),
            "oauth_consumer_key": consumer.key,
            "q": quote_plus('"' + query.encode("utf8") + '"'),
            "q": quote_plus(query.encode("utf8")),
            "type": "html,text",
            "format": "json",
        }