Source code for wsgidav.samples.mysql_dav_provider

# (c) 2009-2023 Martin Wendt and contributors; see WsgiDAV https://github.com/mar10/wsgidav
# Original PyFileServer (c) 2005 Ho Chun Wei.
# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
"""
Implementation of a WebDAV provider that provides a very basic, read-only
resource layer emulation of a MySQL database.

This module is specific to the WsgiDAV application. It provides a
classes ``MySQLBrowserProvider``.

Usage::

    (see docs/sample_wsgidav.yaml)
    MySQLBrowserProvider(host, user, passwd, db)

    host - host of database server
    user - user_name to access database
    passwd - passwd to access database
    db - name of database on database server

The ``MySQLBrowserProvider`` provides a very basic, read-only
resource layer emulation of a MySQL database.
It provides the following interface:

    - the root collection shared consists of collections that correspond to
      table names

    - in each table collection, there is a resource called "_ENTIRE_CONTENTS".
      This is a non-collection resource that returns a csv representation of the
      entire table

    - if the table has a single primary key, each table record will also appear
      as a non-collection resource in the table collection using the primary key
      value as its name. This resource returns a csv representation of the record
      and will also include the record attributes as live properties with
      attribute name as property name and table name suffixed with colon as the
      property namespace


This is a very basic interface and below is a by no means thorough summary of
its limitations:

    - Really only supports having numbers or strings as primary keys. The code uses
      a numeric or string comparison that may not hold up if the primary key is
      a date or some other datatype.

    - There is no handling for cases like BLOBs as primary keys or such. Well, there is
      no handling for BLOBs in general.

    - When returning contents, it buffers the entire contents! A bad way to return
      large tables. Ideally you would have a FileMixin that reads the database even
      as the application reads the file object....

    - It takes too many database queries to return information.
      Ideally there should be some sort of caching for metadata at least, to avoid
      unnecessary queries to the database.

"""
import csv
import hashlib
import time
from io import StringIO

import MySQLdb  # @UnresolvedImport

from wsgidav import util
from wsgidav.dav_error import (
    HTTP_FORBIDDEN,
    DAVError,
    PRECONDITION_CODE_ProtectedProperty,
)
from wsgidav.dav_provider import DAVProvider, _DAVResource

__docformat__ = "reStructuredText"

_logger = util.get_module_logger(__name__)


[docs] class MySQLBrowserResource(_DAVResource): """Represents a single existing DAV resource instance. See also DAVResource and MySQLBrowserProvider. """ def __init__(self, provider, path, is_collection, environ): super().__init__(path, is_collection, environ) self._cache = None def _init(self): """Read resource information into self._cache, for cached access. See DAVResource._init() """ # TODO: recalc self.path from <self._file_path>, to fix correct file system case # On windows this would lead to correct URLs self.provider._count_get_resource_inst_init += 1 tableName, primKey = self.provider._split_path(self.path) display_type = "Unknown" displayTypeComment = "" contentType = "text/html" # _logger.debug("getInfoDict(%s), nc=%s" % (path, self.connectCount)) if tableName is None: display_type = "Database" elif primKey is None: # "database" and table name display_type = "Database Table" else: contentType = "text/csv" if primKey == "_ENTIRE_CONTENTS": display_type = "Database Table Contents" displayTypeComment = "CSV Representation of Table Contents" else: display_type = "Database Record" displayTypeComment = "Attributes available as properties" # Avoid calling is_collection, since it would call isExisting -> _init_connection is_collection = primKey is None self._cache = { "content_length": None, "contentType": contentType, "created": time.time(), "display_name": self.name, "etag": hashlib.md5().update(self.path).hexdigest(), # "etag": md5.new(self.path).hexdigest(), "modified": None, "support_ranges": False, "display_info": {"type": display_type, "typeComment": displayTypeComment}, } # Some resource-only infos: if not is_collection: self._cache["modified"] = time.time() _logger.debug("---> _init, nc=%s" % self.provider._count_initConnection) def _get_info(self, info): if self._cache is None: self._init() return self._cache.get(info) # Getter methods for standard live properties
[docs] def get_content_length(self): return self._get_info("content_length")
[docs] def get_content_type(self): return self._get_info("contentType")
[docs] def get_creation_date(self): return self._get_info("created")
[docs] def get_display_name(self): return self.name
[docs] def get_display_info(self): return self._get_info("display_info")
[docs] def get_etag(self): return self._get_info("etag")
[docs] def get_last_modified(self): return self._get_info("modified")
[docs] def get_member_list(self): """Return list of (direct) collection member names (UTF-8 byte strings). See DAVResource.get_member_list() """ members = [] conn = self.provider._init_connection() try: tableName, primKey = self.provider._split_path(self.path) if tableName is None: retlist = self.provider._list_tables(conn) for name in retlist: members.append( MySQLBrowserResource( self.provider, util.join_uri(self.path, name), True, self.environ, ) ) elif primKey is None: pri_key = self.provider._find_primary_key(conn, tableName) if pri_key is not None: retlist = self.provider._list_fields(conn, tableName, pri_key) for name in retlist: members.append( MySQLBrowserResource( self.provider, util.join_uri(self.path, name), False, self.environ, ) ) members.insert( 0, MySQLBrowserResource( self.provider, util.join_uri(self.path, "_ENTIRE_CONTENTS"), False, self.environ, ), ) finally: conn.close() return members
[docs] def get_content(self): """Open content as a stream for reading. See DAVResource.get_content() """ filestream = StringIO() tableName, primKey = self.provider._split_path(self.path) if primKey is not None: conn = self.provider._init_connection() listFields = self.provider._get_field_list(conn, tableName) csvwriter = csv.DictWriter(filestream, listFields, extrasaction="ignore") dictFields = {} for field_name in listFields: dictFields[field_name] = field_name csvwriter.writerow(dictFields) if primKey == "_ENTIRE_CONTENTS": cursor = conn.cursor(MySQLdb.cursors.DictCursor) cursor.execute("SELECT * from " + self.provider._db + "." + tableName) result_set = cursor.fetchall() for row in result_set: csvwriter.writerow(row) cursor.close() else: row = self.provider._get_record_by_primary_key(conn, tableName, primKey) if row is not None: csvwriter.writerow(row) conn.close() # this suffices for small dbs, but # for a production big database, I imagine you would have a FileMixin that # does the retrieving and population even as the file object is being read filestream.seek(0) return filestream
[docs] def get_property_names(self, *, is_allprop): """Return list of supported property names in Clark Notation. Return supported live and dead properties. (See also DAVProvider.get_property_names().) In addition, all table field names are returned as properties. """ # Let default implementation return supported live and dead properties propNames = super().get_property_names(is_allprop=is_allprop) # Add fieldnames as properties tableName, primKey = self.provider._split_path(self.path) if primKey is not None: conn = self.provider._init_connection() fieldlist = self.provider._get_field_list(conn, tableName) for fieldname in fieldlist: propNames.append(f"{{{tableName}:}}{fieldname}") conn.close() return propNames
[docs] def get_property_value(self, name): """Return the value of a property. The base implementation handles: - ``{DAV:}lockdiscovery`` and ``{DAV:}supportedlock`` using the associated lock manager. - All other *live* properties (i.e. name starts with ``{DAV:}``) are delegated to self.getLivePropertyValue() - Finally, other properties are considered *dead*, and are handled using the associated property manager, if one is present. """ # Return table field as property tableName, primKey = self.provider._split_path(self.path) if primKey is not None: ns, localName = util.split_namespace(name) if ns == (tableName + ":"): conn = self.provider._init_connection() fieldlist = self.provider._get_field_list(conn, tableName) if localName in fieldlist: val = self.provider._get_field_by_primary_key( conn, tableName, primKey, localName ) conn.close() return val conn.close() # else, let default implementation return supported live and dead properties return super().get_property_value(name)
[docs] def set_property_value(self, name, value, dry_run=False): """Set or remove property value. See DAVResource.set_property_value() """ raise DAVError( HTTP_FORBIDDEN, err_condition=PRECONDITION_CODE_ProtectedProperty )
# ============================================================================ # MySQLBrowserProvider # ============================================================================
[docs] class MySQLBrowserProvider(DAVProvider): def __init__(self, host, user, passwd, db): super().__init__() self._host = host self._user = user self._passwd = passwd self._db = db self._count_initConnection = 0 def __repr__(self): return f"{self.__class__.__name__} for db '{self._db}' on '{self._host}' (user: '{self._user}')'" def _split_path(self, path): """Return (tableName, primaryKey) tuple for a request path.""" if path.strip() in (None, "", "/"): return (None, None) tableName, primKey = util.save_split(path.strip("/"), "/", 1) # _logger.debug("'%s' -> ('%s', '%s')" % (path, tableName, primKey)) return (tableName, primKey) def _init_connection(self): self._count_initConnection += 1 return MySQLdb.connect( host=self._host, user=self._user, passwd=self._passwd, db=self._db ) def _get_field_list(self, conn, table_name): retlist = [] cursor = conn.cursor(MySQLdb.cursors.DictCursor) cursor.execute("DESCRIBE " + table_name) result_set = cursor.fetchall() for row in result_set: retlist.append(row["Field"]) cursor.close() return retlist def _is_data_type_numeric(self, datatype): if datatype is None: return False # how many MySQL datatypes does it take to change a lig... I mean, store numbers numerictypes = [ "BIGINT", "INTT", "MEDIUMINT", "SMALLINT", "TINYINT", "BIT", "DEC", "DECIMAL", "DOUBLE", "FLOAT", "REAL", "DOUBLE PRECISION", "INTEGER", "NUMERIC", ] datatype = datatype.upper() for numtype in numerictypes: if datatype.startswith(numtype): return True return False def _exists_record_by_primary_key(self, conn, table_name, pri_key_value): pri_key = None pri_field_type = None cursor = conn.cursor(MySQLdb.cursors.DictCursor) cursor.execute("DESCRIBE " + table_name) result_set = cursor.fetchall() for row in result_set: if row["Key"] == "PRI": if pri_key is None: pri_key = row["Field"] pri_field_type = row["Type"] else: return False # more than one primary key - multipart key? cursor.close() isNumType = self._is_data_type_numeric(pri_field_type) cursor = conn.cursor(MySQLdb.cursors.DictCursor) if isNumType: cursor.execute( "SELECT " + pri_key + " FROM " + self._db + "." + table_name + " WHERE " + pri_key + " = " + pri_key_value ) else: cursor.execute( "SELECT " + pri_key + " FROM " + self._db + "." + table_name + " WHERE " + pri_key + " = '" + pri_key_value + "'" ) row = cursor.fetchone() if row is None: cursor.close() return False cursor.close() return True def _get_field_by_primary_key(self, conn, table_name, pri_key_value, field_name): pri_key = None pri_field_type = None cursor = conn.cursor(MySQLdb.cursors.DictCursor) cursor.execute("DESCRIBE " + table_name) result_set = cursor.fetchall() for row in result_set: if row["Key"] == "PRI": if pri_key is None: pri_key = row["Field"] pri_field_type = row["Type"] else: return None # more than one primary key - multipart key? cursor.close() isNumType = self._is_data_type_numeric(pri_field_type) cursor = conn.cursor(MySQLdb.cursors.DictCursor) if isNumType: cursor.execute( "SELECT " + field_name + " FROM " + self._db + "." + table_name + " WHERE " + pri_key + " = " + pri_key_value ) else: cursor.execute( "SELECT " + field_name + " FROM " + self._db + "." + table_name + " WHERE " + pri_key + " = '" + pri_key_value + "'" ) row = cursor.fetchone() if row is None: cursor.close() return None val = util.to_str(row[field_name]) cursor.close() return val def _get_record_by_primary_key(self, conn, table_name, pri_key_value): dictRet = {} pri_key = None pri_field_type = None cursor = conn.cursor(MySQLdb.cursors.DictCursor) cursor.execute("DESCRIBE " + table_name) result_set = cursor.fetchall() for row in result_set: if row["Key"] == "PRI": if pri_key is None: pri_key = row["Field"] pri_field_type = row["Type"] else: return None # more than one primary key - multipart key? cursor.close() isNumType = self._is_data_type_numeric(pri_field_type) cursor = conn.cursor(MySQLdb.cursors.DictCursor) if isNumType: cursor.execute( "SELECT * FROM " + self._db + "." + table_name + " WHERE " + pri_key + " = " + pri_key_value ) else: cursor.execute( "SELECT * FROM " + self._db + "." + table_name + " WHERE " + pri_key + " = '" + pri_key_value + "'" ) row = cursor.fetchone() if row is None: cursor.close() return None for fname in row.keys(): dictRet[fname] = util.to_str(row[fname]) cursor.close() return dictRet def _find_primary_key(self, conn, table_name): pri_key = None cursor = conn.cursor(MySQLdb.cursors.DictCursor) cursor.execute("DESCRIBE " + table_name) result_set = cursor.fetchall() for row in result_set: fieldname = row["Field"] keyvalue = row["Key"] if keyvalue == "PRI": if pri_key is None: pri_key = fieldname else: return None # more than one primary key - multipart key? cursor.close() return pri_key def _list_fields(self, conn, table_name, field_name): retlist = [] cursor = conn.cursor(MySQLdb.cursors.DictCursor) cursor.execute("SELECT " + field_name + " FROM " + self._db + "." + table_name) result_set = cursor.fetchall() for row in result_set: retlist.append(util.to_str(row[field_name])) cursor.close() return retlist def _list_tables(self, conn): retlist = [] cursor = conn.cursor() cursor.execute("SHOW TABLES") result_set = cursor.fetchall() for row in result_set: retlist.append("%s" % (row[0])) cursor.close() return retlist
[docs] def get_resource_inst(self, path, environ): """Return info dictionary for path. See get_resource_inst() """ # TODO: calling exists() makes directory browsing VERY slow. # At least compared to PyFileServer, which simply used string # functions to get display_type and displayTypeComment self._count_get_resource_inst += 1 if not self.exists(path, environ): return None _tableName, primKey = self._split_path(path) is_collection = primKey is None return MySQLBrowserResource(self, path, is_collection, environ)
[docs] def exists(self, path, environ): tableName, primKey = self._split_path(path) if tableName is None: return True try: conn = None conn = self._init_connection() # Check table existence: tbllist = self._list_tables(conn) if tableName not in tbllist: return False # Check table key existence: if primKey and primKey != "_ENTIRE_CONTENTS": return self._exists_record_by_primary_key(conn, tableName, primKey) return True finally: if conn: conn.close()
[docs] def is_collection(self, path, environ): _tableName, primKey = self._split_path(path) return self.exists(path, environ) and primKey is None