Ver código fonte

Allow content parsers to signal that a source should be excluded.

tags/v0.2
Ben Kurtovic 9 anos atrás
pai
commit
81a090c923
3 arquivos alterados com 23 adições e 12 exclusões
  1. +12
    -9
      earwigbot/exceptions.py
  2. +1
    -0
      earwigbot/wiki/copyvios/parsers.py
  3. +10
    -3
      earwigbot/wiki/copyvios/workers.py

+ 12
- 9
earwigbot/exceptions.py Ver arquivo

@@ -52,6 +52,7 @@ This module contains all exceptions used by EarwigBot::
+-- UnknownSearchEngineError
+-- UnsupportedSearchEngineError
+-- SearchQueryError
+-- ParserExclusionError
"""

class EarwigBotError(Exception):
@@ -231,9 +232,7 @@ class UnknownSearchEngineError(CopyvioCheckError):
:py:attr:`config.wiki["search"]["engine"]`.

Raised by :py:meth:`Page.copyvio_check
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>` and
:py:meth:`Page.copyvio_compare
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`.
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>`.
"""

class UnsupportedSearchEngineError(CopyvioCheckError):
@@ -243,16 +242,20 @@ class UnsupportedSearchEngineError(CopyvioCheckError):
couldn't be imported.

Raised by :py:meth:`Page.copyvio_check
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>` and
:py:meth:`Page.copyvio_compare
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`.
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>`.
"""

class SearchQueryError(CopyvioCheckError):
"""Some error ocurred while doing a search query.

Raised by :py:meth:`Page.copyvio_check
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>` and
:py:meth:`Page.copyvio_compare
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`.
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>`.
"""

class ParserExclusionError(CopyvioCheckError):
"""A content parser detected that the given source should be excluded.

Raised internally by :py:meth:`Page.copyvio_check
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>`; should not be
exposed in client code.
"""

+ 1
- 0
earwigbot/wiki/copyvios/parsers.py Ver arquivo

@@ -27,6 +27,7 @@ from StringIO import StringIO
import mwparserfromhell

from earwigbot import importer
from earwigbot.exceptions import ParserExclusionError

bs4 = importer.new("bs4")
nltk = importer.new("nltk")


+ 10
- 3
earwigbot/wiki/copyvios/workers.py Ver arquivo

@@ -34,6 +34,7 @@ from time import time
from urllib2 import build_opener, URLError

from earwigbot import importer
from earwigbot.exceptions import ParserExclusionError
from earwigbot.wiki.copyvios.markov import MarkovChain, MarkovChainIntersection
from earwigbot.wiki.copyvios.parsers import get_parser
from earwigbot.wiki.copyvios.result import CopyvioCheckResult, CopyvioSource
@@ -218,9 +219,15 @@ class _CopyvioWorker(object):
except StopIteration:
self._logger.debug("Exiting: got stop signal")
return
text = self._open_url(source)
chain = MarkovChain(text) if text else None
source.workspace.compare(source, chain)

try:
text = self._open_url(source)
except ParserExclusionError:
source.skipped = source.excluded = True
source.finish_work()
else:
chain = MarkovChain(text) if text else None
source.workspace.compare(source, chain)

def start(self):
"""Start the copyvio worker in a new thread."""


Carregando…
Cancelar
Salvar