Browse Source

Implement missing cases, plus closing a few Unicode loopholes.

pull/24/head
Ben Kurtovic 12 years ago
parent
commit
f644208f05
2 changed files with 22 additions and 17 deletions
  1. +2
    -1
      build.py
  2. +20
    -16
      pages/copyvios.mako

+ 2
- 1
build.py View File

@@ -15,7 +15,8 @@ from mako.lookup import TemplateLookup


def myapp(environ, start_response): def myapp(environ, start_response):
start_response("200 OK", [("Content-Type", "text/html")]) start_response("200 OK", [("Content-Type", "text/html")])
lookup = TemplateLookup(directories=["{{pages_dir}}"])
lookup = TemplateLookup(directories=["{{pages_dir}}"],
input_encoding="utf8")
template = Template(filename="{{src}}", module_directory="{{temp_dir}}", template = Template(filename="{{src}}", module_directory="{{temp_dir}}",
lookup=lookup, format_exceptions=True) lookup=lookup, format_exceptions=True)
return [template.render(environ=environ).encode("utf8")] return [template.render(environ=environ).encode("utf8")]


+ 20
- 16
pages/copyvios.mako View File

@@ -28,10 +28,11 @@
# if query.get("nocache") or not result: # if query.get("nocache") or not result:
# result = get_fresh_results(page, conn) # result = get_fresh_results(page, conn)
mc1 = __import__("earwigbot").wiki.copyvios.MarkovChain(page.get()) mc1 = __import__("earwigbot").wiki.copyvios.MarkovChain(page.get())
mc2 = __import__("earwigbot").wiki.copyvios.MarkovChain("This is some random textual content for a page.")
mc2 = __import__("earwigbot").wiki.copyvios.MarkovChain(u"This is some random textual content for a page.")
mci = __import__("earwigbot").wiki.copyvios.MarkovChainIntersection(mc1, mc2) mci = __import__("earwigbot").wiki.copyvios.MarkovChainIntersection(mc1, mc2)
result = __import__("earwigbot").wiki.copyvios.CopyvioCheckResult( result = __import__("earwigbot").wiki.copyvios.CopyvioCheckResult(
True, 0.67123, "http://example.com/", 7, mc1, (mc2, mci)) True, 0.67123, "http://example.com/", 7, mc1, (mc2, mci))
# END TEST BLOCK
return page, result return page, result


def get_site(bot, lang, project, all_projects): def get_site(bot, lang, project, all_projects):
@@ -129,18 +130,15 @@
query1 = "SELECT update_time FROM updates WHERE update_service = ?" query1 = "SELECT update_time FROM updates WHERE update_service = ?"
query2 = "SELECT lang_code, lang_name FROM languages" query2 = "SELECT lang_code, lang_name FROM languages"
query3 = "SELECT project_code, project_name FROM projects" query3 = "SELECT project_code, project_name FROM projects"

with conn.cursor() as cursor: with conn.cursor() as cursor:
cursor.execute(query1, ("sites",)) cursor.execute(query1, ("sites",))
time_since_update = int(time() - cursor.fetchall()[0][0]) time_since_update = int(time() - cursor.fetchall()[0][0])
if time_since_update > max_staleness: if time_since_update > max_staleness:
update_sites(bot, cursor) update_sites(bot, cursor)

cursor.execute(query2) cursor.execute(query2)
langs = cursor.fetchall() langs = cursor.fetchall()
cursor.execute(query3) cursor.execute(query3)
projects = cursor.fetchall() projects = cursor.fetchall()

return langs, projects return langs, projects


def update_sites(site, cursor): def update_sites(site, cursor):
@@ -152,8 +150,8 @@
projects.add(("wikimedia", "Wikimedia")) projects.add(("wikimedia", "Wikimedia"))
for special in site: for special in site:
if "closed" not in special and "private" not in special: if "closed" not in special and "private" not in special:
code = special["dbname"]
name = special["code"].capitalize()
code = special["dbname"].encode("utf8")
name = special["code"].encode("utf8").capitalize()
languages.add((code, name)) languages.add((code, name))
this = set() this = set()
for web in site["site"]: for web in site["site"]:
@@ -293,9 +291,9 @@
<select name="lang"> <select name="lang">
% for code, name in all_langs: % for code, name in all_langs:
% if code == selected_lang: % if code == selected_lang:
<option value="${code}" selected="selected">${name}</option>
<option value="${code.decode('utf8')}" selected="selected">${name.decode("utf8")}</option>
% else: % else:
<option value="${code}">${name}</option>
<option value="${code.decode('utf8')}">${name.decode("utf8")}</option>
% endif % endif
% endfor % endfor
</select> </select>
@@ -303,9 +301,9 @@
<select name="project"> <select name="project">
% for code, name in all_projects: % for code, name in all_projects:
% if code == selected_project: % if code == selected_project:
<option value="${code}" selected="selected">${name}</option>
<option value="${code.decode('utf8')}" selected="selected">${name.decode("utf8")}</option>
% else: % else:
<option value="${code}">${name}</option>
<option value="${code.decode('utf8')}">${name.decode("utf8")}</option>
% endif % endif
% endfor % endfor
</select> </select>
@@ -314,7 +312,7 @@
<tr> <tr>
<td>Page title:</td> <td>Page title:</td>
% if page: % if page:
<td><input type="text" name="title" size="60" value="${page.title() | h}" /></td>
<td><input type="text" name="title" size="60" value="${page.title | h}" /></td>
% elif title: % elif title:
<td><input type="text" name="title" size="60" value="${title | h}" /></td> <td><input type="text" name="title" size="60" value="${title | h}" /></td>
% else: % else:
@@ -345,23 +343,29 @@
</table> </table>
</form> </form>
% if project and lang and title and not page: % if project and lang and title and not page:
CASE WHEN GIVEN SITE DOESN'T EXIST
<div class="divider"></div>
<div id="cv-result-yes">
<p>The given site, (project=<b><tt>${project.decode("utf8")}</tt></b>, language=<b><tt>${lang.decode("utf8")}</tt></b>) doesn't seem to exist. <a href="//${lang.decode('utf8')}.${project.decode('utf8')}.org/">Check its URL?</a></p>
</div>
% elif project and lang and title and page and not result: % elif project and lang and title and page and not result:
CASE WHEN GIVEN PAGE DOESN'T EXIST
<div class="divider"></div>
<div id="cv-result-yes">
<p>The given page, <a href="${page.url}">${page.title | h}</a>, doesn't seem to exist.</p>
</div>
% elif page: % elif page:
<div class="divider"></div> <div class="divider"></div>
<div id="cv-result-${'yes' if result.violation else 'no'}"> <div id="cv-result-${'yes' if result.violation else 'no'}">
% if result.violation: % if result.violation:
<h2 id="cv-result-header"><a href="${page.url()}">${page.title() | h}</a> is a suspected violation of <a href="${result.url | h}">${result.url | urlstrip}</a>.</h2>
<h2 id="cv-result-header"><a href="${page.url}">${page.title | h}</a> is a suspected violation of <a href="${result.url | h}">${result.url | urlstrip}</a>.</h2>
% else: % else:
<h2 id="cv-result-header">No violations detected in <a href="${page.url()}">${page.title() | h}</a>.</h2>
<h2 id="cv-result-header">No violations detected in <a href="${page.url()}">${page.title | h}</a>.</h2>
% endif % endif
<ul id="cv-result-list"> <ul id="cv-result-list">
<li><b><tt>${round(result.confidence * 100, 1)}%</tt></b> confidence of a violation.</li> <li><b><tt>${round(result.confidence * 100, 1)}%</tt></b> confidence of a violation.</li>
% if result.cached: % if result.cached:
<li>Results are <a id="cv-cached" href="#">cached <li>Results are <a id="cv-cached" href="#">cached
<span>To save time (and money), this tool will retain the results of checks for up to 72 hours. This includes the URL of the "violated" source, but neither its content nor the content of the article. Future checks on the same page (assuming it remains unchanged) will not involve additional search queries, but a fresh comparison against the source URL will be made. If the page is modified, a new check will be run.</span> <span>To save time (and money), this tool will retain the results of checks for up to 72 hours. This includes the URL of the "violated" source, but neither its content nor the content of the article. Future checks on the same page (assuming it remains unchanged) will not involve additional search queries, but a fresh comparison against the source URL will be made. If the page is modified, a new check will be run.</span>
</a> from ${result.cache_time} (${result.cache_age} ago). <a href="${environ['REQUEST_URI'] | h}&amp;nocache=1">Bypass the cache.</a></li>
</a> from ${result.cache_time} (${result.cache_age} ago). <a href="${environ['REQUEST_URI'].decode("utf8") | h}&amp;nocache=1">Bypass the cache.</a></li>
% else: % else:
<li>Results generated in <tt>${round(result.tdiff, 3)}</tt> seconds using <tt>${result.queries}</tt> queries.</li> <li>Results generated in <tt>${round(result.tdiff, 3)}</tt> seconds using <tt>${result.queries}</tt> queries.</li>
% endif % endif


Loading…
Cancel
Save