Przeglądaj źródła

copyvios: Minor refactor for cleaner stack frames.

tags/v0.3
Ben Kurtovic 5 lat temu
rodzic
commit
466d3a42f1
2 zmienionych plików z 26 dodań i 20 usunięć
  1. +1
    -1
      earwigbot/wiki/copyvios/parsers.py
  2. +25
    -19
      earwigbot/wiki/copyvios/workers.py

+ 1
- 1
earwigbot/wiki/copyvios/parsers.py Wyświetl plik

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2009-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy # Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal # of this software and associated documentation files (the "Software"), to deal


+ 25
- 19
earwigbot/wiki/copyvios/workers.py Wyświetl plik

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Copyright (C) 2009-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2009-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy # Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal # of this software and associated documentation files (the "Software"), to deal
@@ -203,6 +203,28 @@ class _CopyvioWorker(object):
self._queues.lock.release() self._queues.lock.release()
return source return source


def _handle_once(self):
"""Handle a single source from one of the queues."""
try:
source = self._dequeue()
except Empty:
self._logger.debug("Exiting: queue timed out")
return False
except StopIteration:
self._logger.debug("Exiting: got stop signal")
return False

try:
text = self._open_url(source)
except ParserExclusionError:
self._logger.debug("Source excluded by content parser")
source.skipped = source.excluded = True
source.finish_work()
else:
chain = MarkovChain(text) if text else None
source.workspace.compare(source, chain)
return True

def _run(self): def _run(self):
"""Main entry point for the worker thread. """Main entry point for the worker thread.


@@ -211,24 +233,8 @@ class _CopyvioWorker(object):
now empty. now empty.
""" """
while True: while True:
try:
source = self._dequeue()
except Empty:
self._logger.debug("Exiting: queue timed out")
return
except StopIteration:
self._logger.debug("Exiting: got stop signal")
return

try:
text = self._open_url(source)
except ParserExclusionError:
self._logger.debug("Source excluded by content parser")
source.skipped = source.excluded = True
source.finish_work()
else:
chain = MarkovChain(text) if text else None
source.workspace.compare(source, chain)
if not self._handle_once():
break


def start(self): def start(self):
"""Start the copyvio worker in a new thread.""" """Start the copyvio worker in a new thread."""


Ładowanie…
Anuluj
Zapisz