A semantic search engine for source code https://bitshift.benkurtovic.com/
Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.
 
 
 
 
 
 

127 wiersze
4.7 KiB

  1. """
  2. Subpackage with classes and functions to handle communication with the MySQL
  3. database backend, which manages the search index.
  4. """
  5. import os
  6. import mmh3
  7. import oursql
  8. from .migration import VERSION, MIGRATIONS
  9. __all__ = ["Database"]
  10. class Database(object):
  11. """Represents the MySQL database."""
  12. def __init__(self, migrate=False):
  13. self._connect()
  14. self._check_version(migrate)
  15. def _connect(self):
  16. """Establish a connection to the database."""
  17. root = os.path.dirname(os.path.abspath(__file__))
  18. default_file = os.path.join(root, ".my.cnf")
  19. self._conn = oursql.connect(read_default_file=default_file,
  20. autoping=True, autoreconnect=True)
  21. def _migrate(self, cursor, current):
  22. """Migrate the database to the latest schema version."""
  23. for version in xrange(current, VERSION):
  24. print "Migrating to %d..." % version + 1
  25. for query in MIGRATIONS[version - 1]:
  26. cursor.execute(query)
  27. cursor.execute("UPDATE version SET version = ?", (version + 1,))
  28. def _check_version(self, migrate):
  29. """Check the database schema version and respond accordingly.
  30. If the schema is out of date, migrate if *migrate* is True, else raise
  31. an exception.
  32. """
  33. with self._conn.cursor() as cursor:
  34. cursor.execute("SELECT version FROM version")
  35. version = cursor.fetchone()[0]
  36. if version < VERSION:
  37. if migrate:
  38. self._migrate(cursor, version)
  39. else:
  40. err = "Database schema out of date. " \
  41. "Run `python -m bitshift.database.migration`."
  42. raise RuntimeError(err)
  43. def _decompose_url(self, url):
  44. """Break up a URL into an origin (with a URL base) and a suffix."""
  45. pass ## TODO
  46. def _insert_symbols(self, cursor, code_id, sym_type, symbols):
  47. """Insert a list of symbols of a given type into the database."""
  48. sym_types = ["functions", "classes", "variables"]
  49. query1 = "INSERT INTO symbols VALUES (?, ?, ?)"
  50. query2 = "INSERT INTO symbol_locations VALUES (?, ?, ?, ?, ?, ?)"
  51. for (name, decls, uses) in symbols:
  52. cursor.execute(query1, (code_id, sym_types.index(sym_type), name))
  53. sym_id = cursor.lastrowid
  54. params = ([tuple([sym_id, 0] + list(loc)) for loc in decls] +
  55. [tuple([sym_id, 1] + list(loc)) for loc in uses])
  56. cursor.executemany(query2, params)
  57. def close(self):
  58. """Disconnect from the database."""
  59. self._conn.close()
  60. def search(self, query, page=1):
  61. """
  62. Search the database for a query and return the *n*\ th page of results.
  63. :param query: The query to search for.
  64. :type query: :py:class:`~.query.tree.Tree`
  65. :param page: The result page to display.
  66. :type page: int
  67. :return: A list of search results.
  68. :rtype: list of :py:class:`.Codelet`\ s
  69. """
  70. # search for cache_hash = mmh3.hash(query.serialize() + str(page))
  71. # cache HIT:
  72. # update cache_last_used
  73. # return codelets
  74. # cache MISS:
  75. # build complex search query
  76. # fetch codelets
  77. # cache results
  78. # return codelets
  79. pass
  80. def insert(self, codelet):
  81. """
  82. Insert a codelet into the database.
  83. :param codelet: The codelet to insert.
  84. :type codelet: :py:class:`.Codelet`
  85. """
  86. query1 = """INSERT INTO code VALUES (?, ?)
  87. ON DUPLICATE KEY UPDATE code_id=code_id"""
  88. query2 = """INSERT INTO codelets VALUES
  89. (?, ?, ?, ?, ?, ?, ?, ?)"""
  90. query3 = "INSERT INTO authors VALUES (?, ?, ?)"
  91. code_id = mmh3.hash64(codelet.code.encode("utf8"))[0]
  92. origin, url = self._decompose_url(codelet.url)
  93. with self._conn.cursor() as cursor:
  94. cursor.execute(query1, (code_id, codelet.code))
  95. new_code = cursor.rowcount == 1
  96. cursor.execute(query2, (codelet.name, code_id, codelet.language,
  97. origin, url, codelet.rank,
  98. codelet.date_created,
  99. codelet.date_modified))
  100. codelet_id = cursor.lastrowid
  101. authors = [(codelet_id, a[0], a[1]) for a in codelet.authors]
  102. cursor.executemany(query3, authors)
  103. if new_code:
  104. for sym_type, symbols in codelet.symbols.iteritems():
  105. self._insert_symbols(cursor, code_id, sym_type, symbols)