From: a_magical_me Date: Tue, 29 Mar 2011 03:49:17 +0000 (-0700) Subject: Speed up `import pokedex.db` slightly. X-Git-Tag: veekun-promotions/2011041101~6^2~14 X-Git-Url: http://git.veekun.com/zzz-pokedex.git/commitdiff_plain/6df90db0baf023ab59a21f1ca73b79fb04f9e22b?hp=fe7fe27d1e5df6918fb9cf11d64686f85948c564 Speed up `import pokedex.db` slightly. Importing pokedex can take several seconds due to its rather large dependencies—in particular, sqlalchemy, whoosh, and pkg_resources seem to be the largest offenders. Normally, it would be possible to import only the submodules one needs (pokedex.db, say), but pokedex.__init__ brings in all the submodules, for use by the command-line interface. The fix is rather obvious: - Move the command-line stuff into pokedex.main. Note: because the submodules are no longer imported by default, any script which expects `import pokedex` to be useful will likely break. Note: the `pokedex` command will not work until you re-run `python setup.py develop`, to update entry_points.txt. - Don't import pkg_resources until necessary. --- diff --git a/pokedex/__init__.py b/pokedex/__init__.py index e4397af..e69de29 100644 --- a/pokedex/__init__.py +++ b/pokedex/__init__.py @@ -1,287 +0,0 @@ -# encoding: utf8 -from optparse import OptionParser -import os -import sys - -# XXX importing pokedex.whatever should not import all these -import pokedex.db -import pokedex.db.load -import pokedex.db.tables -import pokedex.lookup -from pokedex import defaults - -def main(): - if len(sys.argv) <= 1: - command_help() - - command = sys.argv[1] - args = sys.argv[2:] - - # XXX there must be a better way to get Unicode argv - # XXX this doesn't work on Windows durp - enc = sys.stdin.encoding or 'utf8' - args = [_.decode(enc) for _ in args] - - # Find the command as a function in this file - func = globals().get("command_%s" % command, None) - if func: - func(*args) - else: - command_help() - - -def get_parser(verbose=True): - """Returns an OptionParser prepopulated with the global options. - - `verbose` is whether or not the options should be verbose by default. - """ - parser = OptionParser() - parser.add_option('-e', '--engine', dest='engine_uri', default=None) - parser.add_option('-i', '--index', dest='index_dir', default=None) - parser.add_option('-q', '--quiet', dest='verbose', default=verbose, action='store_false') - parser.add_option('-v', '--verbose', dest='verbose', default=verbose, action='store_true') - return parser - -def get_session(options): - """Given a parsed options object, connects to the database and returns a - session. - """ - - engine_uri = options.engine_uri - got_from = 'command line' - - if engine_uri is None: - engine_uri, got_from = defaults.get_default_db_uri_with_origin() - - session = pokedex.db.connect(engine_uri) - - if options.verbose: - print "Connected to database %(engine)s (from %(got_from)s)" \ - % dict(engine=session.bind.url, got_from=got_from) - - return session - -def get_lookup(options, session=None, recreate=False): - """Given a parsed options object, opens the whoosh index and returns a - PokedexLookup object. - """ - - if recreate and not session: - raise ValueError("get_lookup() needs an explicit session to regen the index") - - index_dir = options.index_dir - got_from = 'command line' - - if index_dir is None: - index_dir, got_from = defaults.get_default_index_dir_with_origin() - - if options.verbose: - print "Opened lookup index %(index_dir)s (from %(got_from)s)" \ - % dict(index_dir=index_dir, got_from=got_from) - - lookup = pokedex.lookup.PokedexLookup(index_dir, session=session) - - if recreate: - lookup.rebuild_index() - - return lookup - -def get_csv_directory(options): - """Prints and returns the csv directory we're about to use.""" - - if not options.verbose: - return - - csvdir = options.directory - got_from = 'command line' - - if csvdir is None: - csvdir, got_from = defaults.get_default_csv_dir_with_origin() - - print "Using CSV directory %(csvdir)s (from %(got_from)s)" \ - % dict(csvdir=csvdir, got_from=got_from) - - return csvdir - - -### Plumbing commands - -def command_dump(*args): - parser = get_parser(verbose=True) - parser.add_option('-d', '--directory', dest='directory', default=None) - options, tables = parser.parse_args(list(args)) - - session = get_session(options) - get_csv_directory(options) - - pokedex.db.load.dump(session, directory=options.directory, - tables=tables, - verbose=options.verbose) - -def command_load(*args): - parser = get_parser(verbose=True) - parser.add_option('-d', '--directory', dest='directory', default=None) - parser.add_option('-D', '--drop-tables', dest='drop_tables', default=False, action='store_true') - parser.add_option('-S', '--safe', dest='safe', default=False, action='store_true', - help="Do not use backend-specific optimalizations.") - options, tables = parser.parse_args(list(args)) - - if not options.engine_uri: - print "WARNING: You're reloading the default database, but not the lookup index. They" - print " might get out of sync, and pokedex commands may not work correctly!" - print "To fix this, run `pokedex reindex` when this command finishes. Or, just use" - print "`pokedex setup` to do both at once." - print - - session = get_session(options) - get_csv_directory(options) - - pokedex.db.load.load(session, directory=options.directory, - drop_tables=options.drop_tables, - tables=tables, - verbose=options.verbose, - safe=options.safe) - -def command_reindex(*args): - parser = get_parser(verbose=True) - options, _ = parser.parse_args(list(args)) - - session = get_session(options) - lookup = get_lookup(options, session=session, recreate=True) - - print "Recreated lookup index." - - -def command_setup(*args): - parser = get_parser(verbose=False) - options, _ = parser.parse_args(list(args)) - - options.directory = None - - session = get_session(options) - get_csv_directory(options) - pokedex.db.load.load(session, directory=None, drop_tables=True, - verbose=options.verbose, - safe=False) - - lookup = get_lookup(options, session=session, recreate=True) - - print "Recreated lookup index." - - -def command_status(*args): - parser = get_parser(verbose=True) - options, _ = parser.parse_args(list(args)) - options.verbose = True - options.directory = None - - # Database, and a lame check for whether it's been inited at least once - session = get_session(options) - print " - OK! Connected successfully." - - if pokedex.db.tables.Pokemon.__table__.exists(session.bind): - print " - OK! Database seems to contain some data." - else: - print " - WARNING: Database appears to be empty." - - # CSV; simple checks that the dir exists - csvdir = get_csv_directory(options) - if not os.path.exists(csvdir): - print " - ERROR: No such directory!" - elif not os.path.isdir(csvdir): - print " - ERROR: Not a directory!" - else: - print " - OK! Directory exists." - - if os.access(csvdir, os.R_OK): - print " - OK! Can read from directory." - else: - print " - ERROR: Can't read from directory!" - - if os.access(csvdir, os.W_OK): - print " - OK! Can write to directory." - else: - print " - WARNING: Can't write to directory! " \ - "`dump` will not work. You may need to sudo." - - # Index; the PokedexLookup constructor covers most tests and will - # cheerfully bomb if they fail - lookup = get_lookup(options, recreate=False) - print " - OK! Opened successfully." - - -### User-facing commands - -def command_lookup(*args): - parser = get_parser(verbose=False) - options, words = parser.parse_args(list(args)) - - name = u' '.join(words) - - session = get_session(options) - lookup = get_lookup(options, session=session, recreate=False) - - results = lookup.lookup(name) - if not results: - print "No matches." - elif results[0].exact: - print "Matched:" - else: - print "Fuzzy-matched:" - - for result in results: - if hasattr(result.object, 'full_name'): - name = result.object.full_name - else: - name = result.object.name - - print "%s: %s" % (result.object.__tablename__, name), - if result.language: - print "(%s in %s)" % (result.name, result.language) - else: - print - - -def command_help(): - print u"""pokedex -- a command-line Pokédex interface -usage: pokedex {command} [options...] -Run `pokedex setup` first, or nothing will work! -See http://bugs.veekun.com/projects/pokedex/wiki/CLI for more documentation. - -Commands: - help Displays this message. - lookup [thing] Look up something in the Pokédex. - -System commands: - load Load Pokédex data into a database from CSV files. - dump Dump Pokédex data from a database into CSV files. - reindex Rebuilds the lookup index from the database. - setup Combines load and reindex. - status No effect, but prints which engine, index, and csv - directory would be used for other commands. - -Global options: - -e|--engine=URI By default, all commands try to use a SQLite database - in the pokedex install directory. Use this option (or - a POKEDEX_DB_ENGINE environment variable) to specify an - alternate database. - -i|--index=DIR By default, all commands try to put the lookup index in - the pokedex install directory. Use this option (or a - POKEDEX_INDEX_DIR environment variable) to specify an - alternate loction. - -q|--quiet Don't print system output. This is the default for - non-system commands and setup. - -v|--verbose Print system output. This is the default for system - commands, except setup. - -System options: - -d|--directory=DIR By default, load and dump will use the CSV files in the - pokedex install directory. Use this option to specify - a different directory. - -D|--drop-tables With load, drop all tables before loading data. - - Additionally, load and dump accept a list of table names (possibly with - wildcards) and/or csv fileames as an argument list. -""".encode(sys.getdefaultencoding(), 'replace') - - sys.exit(0) diff --git a/pokedex/defaults.py b/pokedex/defaults.py index 7321291..b2b0adf 100644 --- a/pokedex/defaults.py +++ b/pokedex/defaults.py @@ -1,13 +1,13 @@ """ pokedex.defaults - logic for finding default paths """ import os -import pkg_resources def get_default_db_uri_with_origin(): uri = os.environ.get('POKEDEX_DB_ENGINE', None) origin = 'environment' if uri is None: + import pkg_resources sqlite_path = pkg_resources.resource_filename('pokedex', 'data/pokedex.sqlite') uri = 'sqlite:///' + sqlite_path @@ -20,6 +20,7 @@ def get_default_index_dir_with_origin(): origin = 'environment' if index_dir is None: + import pkg_resources index_dir = pkg_resources.resource_filename('pokedex', 'data/whoosh-index') origin = 'default' @@ -27,6 +28,7 @@ def get_default_index_dir_with_origin(): return index_dir, origin def get_default_csv_dir_with_origin(): + import pkg_resources csv_dir = pkg_resources.resource_filename('pokedex', 'data/csv') origin = 'default' diff --git a/pokedex/main.py b/pokedex/main.py new file mode 100644 index 0000000..e9810e5 --- /dev/null +++ b/pokedex/main.py @@ -0,0 +1,286 @@ +# encoding: utf8 +from optparse import OptionParser +import os +import sys + +import pokedex.db +import pokedex.db.load +import pokedex.db.tables +import pokedex.lookup +from pokedex import defaults + +def main(): + if len(sys.argv) <= 1: + command_help() + + command = sys.argv[1] + args = sys.argv[2:] + + # XXX there must be a better way to get Unicode argv + # XXX this doesn't work on Windows durp + enc = sys.stdin.encoding or 'utf8' + args = [_.decode(enc) for _ in args] + + # Find the command as a function in this file + func = globals().get("command_%s" % command, None) + if func: + func(*args) + else: + command_help() + + +def get_parser(verbose=True): + """Returns an OptionParser prepopulated with the global options. + + `verbose` is whether or not the options should be verbose by default. + """ + parser = OptionParser() + parser.add_option('-e', '--engine', dest='engine_uri', default=None) + parser.add_option('-i', '--index', dest='index_dir', default=None) + parser.add_option('-q', '--quiet', dest='verbose', default=verbose, action='store_false') + parser.add_option('-v', '--verbose', dest='verbose', default=verbose, action='store_true') + return parser + +def get_session(options): + """Given a parsed options object, connects to the database and returns a + session. + """ + + engine_uri = options.engine_uri + got_from = 'command line' + + if engine_uri is None: + engine_uri, got_from = defaults.get_default_db_uri_with_origin() + + session = pokedex.db.connect(engine_uri) + + if options.verbose: + print "Connected to database %(engine)s (from %(got_from)s)" \ + % dict(engine=session.bind.url, got_from=got_from) + + return session + +def get_lookup(options, session=None, recreate=False): + """Given a parsed options object, opens the whoosh index and returns a + PokedexLookup object. + """ + + if recreate and not session: + raise ValueError("get_lookup() needs an explicit session to regen the index") + + index_dir = options.index_dir + got_from = 'command line' + + if index_dir is None: + index_dir, got_from = defaults.get_default_index_dir_with_origin() + + if options.verbose: + print "Opened lookup index %(index_dir)s (from %(got_from)s)" \ + % dict(index_dir=index_dir, got_from=got_from) + + lookup = pokedex.lookup.PokedexLookup(index_dir, session=session) + + if recreate: + lookup.rebuild_index() + + return lookup + +def get_csv_directory(options): + """Prints and returns the csv directory we're about to use.""" + + if not options.verbose: + return + + csvdir = options.directory + got_from = 'command line' + + if csvdir is None: + csvdir, got_from = defaults.get_default_csv_dir_with_origin() + + print "Using CSV directory %(csvdir)s (from %(got_from)s)" \ + % dict(csvdir=csvdir, got_from=got_from) + + return csvdir + + +### Plumbing commands + +def command_dump(*args): + parser = get_parser(verbose=True) + parser.add_option('-d', '--directory', dest='directory', default=None) + options, tables = parser.parse_args(list(args)) + + session = get_session(options) + get_csv_directory(options) + + pokedex.db.load.dump(session, directory=options.directory, + tables=tables, + verbose=options.verbose) + +def command_load(*args): + parser = get_parser(verbose=True) + parser.add_option('-d', '--directory', dest='directory', default=None) + parser.add_option('-D', '--drop-tables', dest='drop_tables', default=False, action='store_true') + parser.add_option('-S', '--safe', dest='safe', default=False, action='store_true', + help="Do not use backend-specific optimalizations.") + options, tables = parser.parse_args(list(args)) + + if not options.engine_uri: + print "WARNING: You're reloading the default database, but not the lookup index. They" + print " might get out of sync, and pokedex commands may not work correctly!" + print "To fix this, run `pokedex reindex` when this command finishes. Or, just use" + print "`pokedex setup` to do both at once." + print + + session = get_session(options) + get_csv_directory(options) + + pokedex.db.load.load(session, directory=options.directory, + drop_tables=options.drop_tables, + tables=tables, + verbose=options.verbose, + safe=options.safe) + +def command_reindex(*args): + parser = get_parser(verbose=True) + options, _ = parser.parse_args(list(args)) + + session = get_session(options) + lookup = get_lookup(options, session=session, recreate=True) + + print "Recreated lookup index." + + +def command_setup(*args): + parser = get_parser(verbose=False) + options, _ = parser.parse_args(list(args)) + + options.directory = None + + session = get_session(options) + get_csv_directory(options) + pokedex.db.load.load(session, directory=None, drop_tables=True, + verbose=options.verbose, + safe=False) + + lookup = get_lookup(options, session=session, recreate=True) + + print "Recreated lookup index." + + +def command_status(*args): + parser = get_parser(verbose=True) + options, _ = parser.parse_args(list(args)) + options.verbose = True + options.directory = None + + # Database, and a lame check for whether it's been inited at least once + session = get_session(options) + print " - OK! Connected successfully." + + if pokedex.db.tables.Pokemon.__table__.exists(session.bind): + print " - OK! Database seems to contain some data." + else: + print " - WARNING: Database appears to be empty." + + # CSV; simple checks that the dir exists + csvdir = get_csv_directory(options) + if not os.path.exists(csvdir): + print " - ERROR: No such directory!" + elif not os.path.isdir(csvdir): + print " - ERROR: Not a directory!" + else: + print " - OK! Directory exists." + + if os.access(csvdir, os.R_OK): + print " - OK! Can read from directory." + else: + print " - ERROR: Can't read from directory!" + + if os.access(csvdir, os.W_OK): + print " - OK! Can write to directory." + else: + print " - WARNING: Can't write to directory! " \ + "`dump` will not work. You may need to sudo." + + # Index; the PokedexLookup constructor covers most tests and will + # cheerfully bomb if they fail + lookup = get_lookup(options, recreate=False) + print " - OK! Opened successfully." + + +### User-facing commands + +def command_lookup(*args): + parser = get_parser(verbose=False) + options, words = parser.parse_args(list(args)) + + name = u' '.join(words) + + session = get_session(options) + lookup = get_lookup(options, session=session, recreate=False) + + results = lookup.lookup(name) + if not results: + print "No matches." + elif results[0].exact: + print "Matched:" + else: + print "Fuzzy-matched:" + + for result in results: + if hasattr(result.object, 'full_name'): + name = result.object.full_name + else: + name = result.object.name + + print "%s: %s" % (result.object.__tablename__, name), + if result.language: + print "(%s in %s)" % (result.name, result.language) + else: + print + + +def command_help(): + print u"""pokedex -- a command-line Pokédex interface +usage: pokedex {command} [options...] +Run `pokedex setup` first, or nothing will work! +See http://bugs.veekun.com/projects/pokedex/wiki/CLI for more documentation. + +Commands: + help Displays this message. + lookup [thing] Look up something in the Pokédex. + +System commands: + load Load Pokédex data into a database from CSV files. + dump Dump Pokédex data from a database into CSV files. + reindex Rebuilds the lookup index from the database. + setup Combines load and reindex. + status No effect, but prints which engine, index, and csv + directory would be used for other commands. + +Global options: + -e|--engine=URI By default, all commands try to use a SQLite database + in the pokedex install directory. Use this option (or + a POKEDEX_DB_ENGINE environment variable) to specify an + alternate database. + -i|--index=DIR By default, all commands try to put the lookup index in + the pokedex install directory. Use this option (or a + POKEDEX_INDEX_DIR environment variable) to specify an + alternate loction. + -q|--quiet Don't print system output. This is the default for + non-system commands and setup. + -v|--verbose Print system output. This is the default for system + commands, except setup. + +System options: + -d|--directory=DIR By default, load and dump will use the CSV files in the + pokedex install directory. Use this option to specify + a different directory. + -D|--drop-tables With load, drop all tables before loading data. + + Additionally, load and dump accept a list of table names (possibly with + wildcards) and/or csv fileames as an argument list. +""".encode(sys.getdefaultencoding(), 'replace') + + sys.exit(0) diff --git a/setup.py b/setup.py index 7527672..3bc7028 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setup( entry_points = { 'console_scripts': [ - 'pokedex = pokedex:main', + 'pokedex = pokedex.main:main', ], }, )