// Copyright (C) 2002 Graydon Hoare // // This program is made available under the GNU GPL version 2.0 or // greater. See the accompanying file COPYING for details. // // This program is distributed WITHOUT ANY WARRANTY; without even the // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // PURPOSE. #include "base.hh" #include #include "lexical_cast.hh" #include "sqlite/sqlite3.h" #include #include "sanity.hh" #include "schema_migration.hh" #include "app_state.hh" #include "keys.hh" #include "transforms.hh" #include "ui.hh" using std::string; // this file knows how to migrate schema databases. the general strategy is // to hash each schema we ever use, and make a list of the SQL commands // required to get from one hash value to the next. when you do a // migration, the migrator locates your current db's state on the list and // then runs all the migration functions between that point and the target // of the migration. // you will notice a little bit of duplicated code between here and // database.cc; this was originally to facilitate inclusion of migration // capability into the depot code, but is now preserved because the code // in this file is easier to write and understand if it speaks directly // to sqlite. // Wrappers around the bare sqlite3 API. We do not use sqlite3_exec because // we want the better error handling that sqlite3_prepare_v2 gives us. void assert_sqlite3_ok(sqlite3 * db) { int errcode = sqlite3_errcode(db); if (errcode == SQLITE_OK) return; char const * errmsg = sqlite3_errmsg(db); // first log the code so we can find _out_ what the confusing code // was... note that code does not uniquely identify the errmsg, unlike // errno's. L(FL("sqlite error: %d: %s") % errcode % errmsg); // Check the string to see if it looks like an informative_failure // thrown from within an SQL extension function, caught, and turned // into a call to sqlite3_result_error. (Extension functions have to // do this to avoid corrupting sqlite's internal state.) If it is, // rethrow it rather than feeding it to E(), lest we get "error: // sqlite error: error: " ugliness. char const *pfx = _("error: "); if (!std::strncmp(errmsg, pfx, strlen(pfx))) throw informative_failure(errmsg); // sometimes sqlite is not very helpful // so we keep a table of errors people have gotten and more helpful versions char const * auxiliary_message = ""; switch (errcode) { // All memory-exhaustion conditions should give the same diagnostic. case SQLITE_NOMEM: throw std::bad_alloc(); // These diagnostics generally indicate an operating-system-level // failure. It would be nice to throw strerror(errno) in there but // we cannot assume errno is still valid by the time we get here. case SQLITE_IOERR: case SQLITE_CANTOPEN: case SQLITE_PROTOCOL: auxiliary_message = _("make sure database and containing directory are writeable\n" "and you have not run out of disk space"); break; // These error codes may indicate someone is trying to load a database // so old that it's in sqlite 2's disk format (monotone 0.16 or // older). case SQLITE_CORRUPT: case SQLITE_NOTADB: auxiliary_message = _("(if this is a database last used by monotone 0.16 or older,\n" "you must follow a special procedure to make it usable again.\n" "see the file UPGRADE, in the distribution, for instructions.)"); default: break; } // if the auxiliary message is empty, the \n will be stripped off too E(false, F("sqlite error: %s\n%s") % errmsg % auxiliary_message); } namespace { struct sql { sql(sqlite3 * db, int cols, char const *cmd, char const **afterp = 0) : stmt(0), ncols(cols) { sqlite3_stmt * s; char const * after; L(FL("executing SQL '%s'") % cmd); sqlite3_prepare_v2(db, cmd, strlen(cmd), &s, &after); assert_sqlite3_ok(db); I(s); if (afterp) *afterp = after; else I(*after == 0); I(sqlite3_column_count(s) == ncols); stmt = s; } ~sql() { if (stmt) sqlite3_finalize(stmt); } bool step() { int res = sqlite3_step(stmt); if (res == SQLITE_ROW) return true; if (res == SQLITE_DONE) { L(FL("success")); return false; } // Diagnostics from sqlite3_result_error show up in sqlite3_errmsg // only after sqlite3_finalize or sqlite3_reset are called on the // stmt object. See SQLite ticket #1640. sqlite3 * db = sqlite3_db_handle(stmt); sqlite3_finalize(stmt); stmt = 0; assert_sqlite3_ok(db); I(false); } int column_int(int col) { I(col >= 0 && col < ncols); return sqlite3_column_int(stmt, col); } string column_string(int col) { I(col >= 0 && col < ncols); return string(reinterpret_cast (sqlite3_column_text(stmt, col))); } bool column_nonnull(int col) { I(col >= 0 && col < ncols); return sqlite3_column_type(stmt, col) != SQLITE_NULL; } // convenience for executing a sequence of sql statements, // none of which returns any rows. static void exec(sqlite3 * db, char const * cmd) { do { sql stmt(db, 0, cmd, &cmd); I(stmt.step() == false); } while (*cmd != '\0'); } // convenience for evaluating an expression that returns a single number. static int value(sqlite3 * db, char const * cmd) { sql stmt(db, 1, cmd); I(stmt.step() == true); int res = stmt.column_int(0); I(stmt.step() == false); return res; } // convenience for making functions static void create_function(sqlite3 * db, char const * name, void (*fn)(sqlite3_context *, int, sqlite3_value **)) { sqlite3_create_function(db, name, -1, SQLITE_UTF8, 0, fn, 0, 0); assert_sqlite3_ok(db); } private: sqlite3_stmt * stmt; int ncols; }; struct transaction { transaction(sqlite3 * s) : db(s), committed(false) { sql::exec(db, "BEGIN EXCLUSIVE"); } void commit() { I(committed == false); committed = true; } ~transaction() { if (committed) sql::exec(db, "COMMIT"); else sql::exec(db, "ROLLBACK"); } private: sqlite3 * db; bool committed; }; } // SQL extension functions. // sqlite3_value_text returns unsigned char const *, which is inconvenient inline char const * sqlite3_value_cstr(sqlite3_value * arg) { return reinterpret_cast(sqlite3_value_text(arg)); } inline bool is_ws(char c) { return c == '\r' || c == '\n' || c == '\t' || c == ' '; } static void sqlite_sha1_fn(sqlite3_context *f, int nargs, sqlite3_value ** args) { if (nargs <= 1) { sqlite3_result_error(f, "need at least 1 arg to sha1()", -1); return; } string tmp; if (nargs == 1) { char const * s = sqlite3_value_cstr(args[0]); char const * end = s + sqlite3_value_bytes(args[0]) - 1; remove_copy_if(s, end, back_inserter(tmp), is_ws); } else { char const * sep = sqlite3_value_cstr(args[0]); for (int i = 1; i < nargs; ++i) { if (i > 1) tmp += sep; char const * s = sqlite3_value_cstr(args[i]); char const * end = s + sqlite3_value_bytes(args[i]) - 1; remove_copy_if(s, end, back_inserter(tmp), is_ws); } } hexenc sha; calculate_ident(data(tmp), sha); sqlite3_result_text(f, sha().c_str(), sha().size(), SQLITE_TRANSIENT); } static void sqlite3_unbase64_fn(sqlite3_context *f, int nargs, sqlite3_value ** args) { if (nargs != 1) { sqlite3_result_error(f, "need exactly 1 arg to unbase64()", -1); return; } data decoded; // This operation may throw informative_failure. We must intercept that // and turn it into a call to sqlite3_result_error, or rollback will fail. try { decode_base64(base64(string(sqlite3_value_cstr(args[0]))), decoded); } catch (informative_failure & e) { sqlite3_result_error(f, e.what(), -1); return; } sqlite3_result_blob(f, decoded().c_str(), decoded().size(), SQLITE_TRANSIENT); } // Here are all of the migration steps. Almost all of them can be expressed // entirely as a series of SQL statements; those statements are packaged // into a long, continued string constant for the step. One step requires a // function instead. char const migrate_merge_url_and_group[] = // migrate the posting_queue table "ALTER TABLE posting_queue RENAME TO tmp;" "CREATE TABLE posting_queue" " ( url not null, -- URL we are going to send this to\n" " content not null -- the packets we're going to send\n" " );" "INSERT INTO posting_queue" " SELECT (url || '/' || groupname), content FROM tmp;" "DROP TABLE tmp;" // migrate the incoming_queue table "ALTER TABLE incoming_queue RENAME TO tmp;" "CREATE TABLE incoming_queue " " ( url not null, -- URL we got this bundle from\n" " content not null -- the packets we're going to read\n" " );" "INSERT INTO incoming_queue" " SELECT (url || '/' || groupname), content FROM tmp;" "DROP TABLE tmp;" // migrate the sequence_numbers table "ALTER TABLE sequence_numbers RENAME TO tmp;" "CREATE TABLE sequence_numbers " " ( url primary key, -- URL to read from\n" " major not null, -- 0 in news servers, may be higher in depots\n" " minor not null -- last article / packet sequence number we got\n" " );" "INSERT INTO sequence_numbers" " SELECT (url || '/' || groupname), major, minor FROM tmp;" "DROP TABLE tmp;" // migrate the netserver_manifests table "ALTER TABLE netserver_manifests RENAME TO tmp;" "CREATE TABLE netserver_manifests" " ( url not null, -- url of some server\n" " manifest not null, -- manifest which exists on url\n" " unique(url, manifest)" " );" "INSERT INTO netserver_manifests" " SELECT (url || '/' || groupname), manifest FROM tmp;" "DROP TABLE tmp;" ; char const migrate_add_hashes_and_merkle_trees[] = // add the column to manifest_certs "ALTER TABLE manifest_certs RENAME TO tmp;" "CREATE TABLE manifest_certs" " ( hash not null unique, -- hash of remaining fields separated by \":\"\n" " id not null, -- joins with manifests.id or manifest_deltas.id\n" " name not null, -- opaque string chosen by user\n" " value not null, -- opaque blob\n" " keypair not null, -- joins with public_keys.id\n" " signature not null, -- RSA/SHA1 signature of \"[name@id:val]\"\n" " unique(name, id, value, keypair, signature)" " );" "INSERT INTO manifest_certs" " SELECT sha1(':', id, name, value, keypair, signature)," " id, name, value, keypair, signature" " FROM tmp;" "DROP TABLE tmp;" // add the column to file_certs "ALTER TABLE file_certs RENAME TO tmp;" "CREATE TABLE file_certs" " ( hash not null unique, -- hash of remaining fields separated by \":\"\n" " id not null, -- joins with files.id or file_deltas.id\n" " name not null, -- opaque string chosen by user\n" " value not null, -- opaque blob\n" " keypair not null, -- joins with public_keys.id\n" " signature not null, -- RSA/SHA1 signature of \"[name@id:val]\"\n" " unique(name, id, value, keypair, signature)" " );" "INSERT INTO file_certs" " SELECT sha1(':', id, name, value, keypair, signature)," " id, name, value, keypair, signature" " FROM tmp;" "DROP TABLE tmp;" // add the column to public_keys "ALTER TABLE public_keys RENAME TO tmp;" "CREATE TABLE public_keys" " ( hash not null unique, -- hash of remaining fields separated by \":\"\n" " id primary key, -- key identifier chosen by user\n" " keydata not null -- RSA public params\n" " );" "INSERT INTO public_keys SELECT sha1(':',id,keydata), id, keydata FROM tmp;" "DROP TABLE tmp;" // add the column to private_keys "ALTER TABLE private_keys RENAME TO tmp;" "CREATE TABLE private_keys" " ( hash not null unique, -- hash of remaining fields separated by \":\"\n" " id primary key, -- as in public_keys (same identifiers, in fact)\n" " keydata not null -- encrypted RSA private params\n" " );" "INSERT INTO private_keys SELECT sha1(':',id,keydata), id, keydata FROM tmp;" "DROP TABLE tmp;" // add the merkle tree stuff "CREATE TABLE merkle_nodes" " ( type not null, -- \"key\", \"mcert\", \"fcert\", \"manifest\"\n" " collection not null, -- name chosen by user\n" " level not null, -- tree level this prefix encodes\n" " prefix not null, -- label identifying node in tree\n" " body not null, -- binary, base64'ed node contents\n" " unique(type, collection, level, prefix)" ");" ; char const migrate_to_revisions[] = "DROP TABLE schema_version;" "DROP TABLE posting_queue;" "DROP TABLE incoming_queue;" "DROP TABLE sequence_numbers;" "DROP TABLE file_certs;" "DROP TABLE netserver_manifests;" "DROP TABLE merkle_nodes;" "CREATE TABLE merkle_nodes" " ( type not null, -- \"key\", \"mcert\", \"fcert\", \"rcert\"\n" " collection not null, -- name chosen by user\n" " level not null, -- tree level this prefix encodes\n" " prefix not null, -- label identifying node in tree\n" " body not null, -- binary, base64'ed node contents\n" " unique(type, collection, level, prefix)" " );" "CREATE TABLE revision_certs" " ( hash not null unique, -- hash of remaining fields separated by \":\"\n" " id not null, -- joins with revisions.id\n" " name not null, -- opaque string chosen by user\n" " value not null, -- opaque blob\n" " keypair not null, -- joins with public_keys.id\n" " signature not null, -- RSA/SHA1 signature of \"[name@id:val]\"\n" " unique(name, id, value, keypair, signature)" " );" "CREATE TABLE revisions" " ( id primary key, -- SHA1(text of revision)\n" " data not null -- compressed, encoded contents of a revision\n" " );" "CREATE TABLE revision_ancestry" " ( parent not null, -- joins with revisions.id\n" " child not null, -- joins with revisions.id\n" " unique(parent, child)" " );" ; char const migrate_to_epochs[] = "DROP TABLE merkle_nodes;" "CREATE TABLE branch_epochs\n" " ( hash not null unique, -- hash of remaining fields separated by \":\"\n" " branch not null unique, -- joins with revision_certs.value\n" " epoch not null -- random hex-encoded id\n" " );" ; char const migrate_to_vars[] = "CREATE TABLE db_vars\n" " ( domain not null, -- scope of application of a var\n" " name not null, -- var key\n" " value not null, -- var value\n" " unique(domain, name)" " );" ; char const migrate_add_indexes[] = "CREATE INDEX revision_ancestry__child ON revision_ancestry (child);" "CREATE INDEX revision_certs__id ON revision_certs (id);" "CREATE INDEX revision_certs__name_value ON revision_certs (name, value);" ; // There is, perhaps, an argument for turning the logic inside the // while-loop into a callback function like unbase64(). We would then not // need a special case for this step in the master migration loop. However, // we'd have to get the app_state in there somehow, we might in the future // need to do other things that can't be easily expressed in pure SQL, and // besides I think it's clearer this way. static void migrate_to_external_privkeys(sqlite3 * db, app_state &app) { { sql stmt(db, 3, "SELECT private_keys.id, private_keys.keydata, public_keys.keydata" " FROM private_keys LEFT OUTER JOIN public_keys" " ON private_keys.id = public_keys.id"); while (stmt.step()) { rsa_keypair_id ident(stmt.column_string(0)); base64< arc4 > old_priv(stmt.column_string(1)); keypair kp; migrate_private_key(app, ident, old_priv, kp); MM(kp.pub); if (stmt.column_nonnull(2)) { base64< rsa_pub_key > pub(stmt.column_string(2)); MM(pub); N(keys_match(ident, pub, ident, kp.pub), F("public and private keys for %s don't match") % ident); } P(F("moving key '%s' from database to %s") % ident % app.keys.get_key_dir()); app.keys.put_key_pair(ident, kp); } } sql::exec(db, "DROP TABLE private_keys;"); } char const migrate_add_rosters[] = "CREATE TABLE rosters" " ( id primary key, -- strong hash of the roster\n" " data not null -- compressed, encoded contents of the roster\n" " );" "CREATE TABLE roster_deltas" " ( id not null, -- strong hash of the roster\n" " base not null, -- joins with either rosters.id or roster_deltas.id\n" " delta not null, -- rdiff to construct current from base\n" " unique(id, base)" " );" "CREATE TABLE revision_roster" " ( rev_id primary key, -- joins with revisions.id\n" " roster_id not null -- joins with either rosters.id or roster_deltas.id\n" " );" "CREATE TABLE next_roster_node_number" " ( node primary key -- only one entry in this table, ever\n" " );" ; // I wish I had a form of ALTER TABLE COMMENT on sqlite3 char const migrate_files_BLOB[] = // change the encoding of file(_delta)s "ALTER TABLE files RENAME TO tmp;" "CREATE TABLE files" " ( id primary key, -- strong hash of file contents\n" " data not null -- compressed contents of a file\n" " );" "INSERT INTO files SELECT id, unbase64(data) FROM tmp;" "DROP TABLE tmp;" "ALTER TABLE file_deltas RENAME TO tmp;" "CREATE TABLE file_deltas" " ( id not null, -- strong hash of file contents\n" " base not null, -- joins with files.id or file_deltas.id\n" " delta not null, -- compressed rdiff to construct current from base\n" " unique(id, base)" " );" "INSERT INTO file_deltas SELECT id, base, unbase64(delta) FROM tmp;" "DROP TABLE tmp;" // migrate other contents which are accessed by get|put_version "UPDATE manifests SET data=unbase64(data);" "UPDATE manifest_deltas SET delta=unbase64(delta);" "UPDATE rosters SET data=unbase64(data) ;" "UPDATE roster_deltas SET delta=unbase64(delta);" "UPDATE db_vars SET value=unbase64(value), name=unbase64(name);" "UPDATE public_keys SET keydata=unbase64(keydata);" "UPDATE revision_certs SET value=unbase64(value)," " signature=unbase64(signature);" "UPDATE manifest_certs SET value=unbase64(value)," " signature=unbase64(signature);" "UPDATE revisions SET data=unbase64(data);" "UPDATE branch_epochs SET branch=unbase64(branch);" ; char const migrate_rosters_no_hash[] = "DROP TABLE rosters;" "DROP TABLE roster_deltas;" "DROP TABLE revision_roster;" "CREATE TABLE rosters" " ( id primary key, -- a revision id\n" " checksum not null, -- checksum of 'data', to protect against" " disk corruption\n" " data not null -- compressed, encoded contents of the roster\n" " );" "CREATE TABLE roster_deltas" " ( id primary key, -- a revision id\n" " checksum not null, -- checksum of 'delta', to protect against" " disk corruption\n" " base not null, -- joins with either rosters.id or roster_deltas.id\n" " delta not null -- rdiff to construct current from base\n" " );" ; char const migrate_add_heights[] = "CREATE TABLE heights" " ( revision not null, -- joins with revisions.id\n" " height not null, -- complex height, array of big endian u32 integers\n" " unique(revision, height)" " );" ; char const migrate_add_heights_index[] = "CREATE INDEX heights__height ON heights (height);" ; // this is a function because it has to refer to the numeric constant // defined in schema_migration.hh. static void migrate_add_ccode(sqlite3 * db, app_state &) { string cmd = "PRAGMA user_version = "; cmd += boost::lexical_cast(mtn_creator_code); sql::exec(db, cmd.c_str()); } // these must be listed in order so that ones listed earlier override ones // listed later enum upgrade_regime { upgrade_changesetify, upgrade_rosterify, upgrade_regen_caches, upgrade_none, }; static void dump(enum upgrade_regime const & regime, string & out) { switch (regime) { case upgrade_changesetify: out = "upgrade_changesetify"; break; case upgrade_rosterify: out = "upgrade_rosterify"; break; case upgrade_regen_caches: out = "upgrade_regen_caches"; break; case upgrade_none: out = "upgrade_none"; break; default: out = (FL("upgrade_regime(%d)") % regime).str(); break; } } typedef void (*migrator_cb)(sqlite3 *, app_state &); // Exactly one of migrator_sql and migrator_func should be non-null in // all entries in migration_events, except the very last. struct migration_event { char const * id; char const * migrator_sql; migrator_cb migrator_func; upgrade_regime regime; }; // IMPORTANT: whenever you modify this to add a new schema version, you must // also add a new migration test for the new schema version. See // tests/schema_migration for details. const migration_event migration_events[] = { { "edb5fa6cef65bcb7d0c612023d267c3aeaa1e57a", migrate_merge_url_and_group, 0, upgrade_none }, { "f042f3c4d0a4f98f6658cbaf603d376acf88ff4b", migrate_add_hashes_and_merkle_trees, 0, upgrade_none }, { "8929e54f40bf4d3b4aea8b037d2c9263e82abdf4", migrate_to_revisions, 0, upgrade_changesetify }, { "c1e86588e11ad07fa53e5d294edc043ce1d4005a", migrate_to_epochs, 0, upgrade_none }, { "40369a7bda66463c5785d160819ab6398b9d44f4", migrate_to_vars, 0, upgrade_none }, { "e372b508bea9b991816d1c74680f7ae10d2a6d94", migrate_add_indexes, 0, upgrade_none }, { "1509fd75019aebef5ac3da3a5edf1312393b70e9", 0, migrate_to_external_privkeys, upgrade_none }, { "bd86f9a90b5d552f0be1fa9aee847ea0f317778b", migrate_add_rosters, 0, upgrade_rosterify }, { "1db80c7cee8fa966913db1a463ed50bf1b0e5b0e", migrate_files_BLOB, 0, upgrade_none }, { "9d2b5d7b86df00c30ac34fe87a3c20f1195bb2df", migrate_rosters_no_hash, 0, upgrade_regen_caches }, { "ae196843d368d042f475e3dadfed11e9d7f9f01e", migrate_add_heights, 0, upgrade_regen_caches }, { "48fd5d84f1e5a949ca093e87e5ac558da6e5956d", 0, migrate_add_ccode, upgrade_none }, { "fe48b0804e0048b87b4cea51b3ab338ba187bdc2", migrate_add_heights_index, 0, upgrade_none }, // The last entry in this table should always be the current // schema ID, with 0 for the migrators. { "7ca81b45279403419581d7fde31ed888a80bd34e", 0, 0, upgrade_none } }; const size_t n_migration_events = (sizeof migration_events / sizeof migration_events[0]); // unfortunately, this has to be aware of the migration_events array and its // limits, lest we crash trying to print the garbage on either side. static void dump(struct migration_event const * const & mref, string & out) { struct migration_event const * m = mref; ptrdiff_t i = m - migration_events; if (m == 0) out = "invalid migration event (null pointer)"; else if (i < 0 || static_cast(i) >= n_migration_events) out = (FL("invalid migration event, index %ld/%lu") % i % n_migration_events).str(); else { char const * type; if (m->migrator_sql) type = "SQL only"; else if (m->migrator_func) type = "codeful"; else type = "none (current)"; string regime; dump(m->regime, regime); out = (FL("migration %ld/%lu: %s, %s, from %s") % i % n_migration_events % type % regime % m->id).str(); } } // The next several functions are concerned with calculating the schema hash // and determining whether a database is usable (with or without migration). static void calculate_schema_id(sqlite3 * db, string & ident) { sql stmt(db, 1, "SELECT sql FROM sqlite_master " "WHERE (type = 'table' OR type = 'index') " // filter out NULL statements, because // those are auto-generated indices (for // UNIQUE constraints, etc.). "AND sql IS NOT NULL " "AND name not like 'sqlite_stat%' " "ORDER BY name"); string schema; using boost::char_separator; typedef boost::tokenizer > tokenizer; char_separator sep(" \r\n\t", "(),;"); while (stmt.step()) { string table_schema(stmt.column_string(0)); tokenizer tokens(table_schema, sep); for (tokenizer::iterator i = tokens.begin(); i != tokens.end(); i++) { if (schema.size() != 0) schema += " "; schema += *i; } } u32 code = sql::value(db, "PRAGMA user_version"); if (code != 0) { schema += " PRAGMA user_version = "; schema += boost::lexical_cast(code); } hexenc tid; calculate_ident(data(schema), tid); ident = tid(); } // Look through the migration_events table and return a pointer to the entry // corresponding to database DB, or null if it isn't there (i.e. if the // database schema is not one we know). static migration_event const * find_migration(sqlite3 * db) { string id; calculate_schema_id(db, id); for (migration_event const *m = migration_events + n_migration_events - 1; m >= migration_events; m--) if (m->id == id) return m; return 0; } // This enumerates the possible mismatches between the monotone executable // and its database. enum schema_mismatch_case { SCHEMA_MATCHES = 0, SCHEMA_MIGRATION_NEEDED, SCHEMA_TOO_NEW, SCHEMA_NOT_MONOTONE, SCHEMA_EMPTY }; static void dump(schema_mismatch_case const & cat, std::string & out) { switch (cat) { case SCHEMA_MATCHES: out = "SCHEMA_MATCHES"; break; case SCHEMA_MIGRATION_NEEDED: out = "SCHEMA_MIGRATION_NEEDED"; break; case SCHEMA_TOO_NEW: out = "SCHEMA_TOO_NEW"; break; case SCHEMA_NOT_MONOTONE: out = "SCHEMA_NOT_MONOTONE"; break; case SCHEMA_EMPTY: out = "SCHEMA_EMPTY"; break; default: out = (FL("schema_mismatch_case(%d)") % cat).str(); break; } } static schema_mismatch_case classify_schema(sqlite3 * db, migration_event const * m = 0) { if (!m) m = find_migration(db); if (m) { if (m->migrator_sql || m->migrator_func) return SCHEMA_MIGRATION_NEEDED; else return SCHEMA_MATCHES; } else { // Distinguish an utterly empty database, such as is created by // "mtn db load < /dev/null", or by the sqlite3 command line utility // if you don't give it anything to do. if (sql::value(db, "SELECT COUNT(*) FROM sqlite_master") == 0) return SCHEMA_EMPTY; // monotone started setting this value in database headers only with // version 0.33, but all previous versions' databases are recognized // by their schema hashes. u32 code = sql::value(db, "PRAGMA user_version"); if (code != mtn_creator_code) return SCHEMA_NOT_MONOTONE; return SCHEMA_TOO_NEW; } } string describe_sql_schema(sqlite3 * db) { I(db != NULL); string hash; calculate_schema_id(db, hash); switch (classify_schema(db)) { case SCHEMA_MATCHES: return (F("%s (usable)") % hash).str(); case SCHEMA_MIGRATION_NEEDED: return (F("%s (migration needed)") % hash).str(); case SCHEMA_TOO_NEW: return (F("%s (too new, cannot use)") % hash).str(); case SCHEMA_NOT_MONOTONE: return (F("%s (not a monotone database)") % hash).str(); case SCHEMA_EMPTY: return (F("%s (database has no tables!)") % hash).str(); default: I(false); } } // Provide sensible diagnostics for a database schema whose hash we do not // recognize. (Shared between check_sql_schema and migrate_sql_schema.) static void diagnose_unrecognized_schema(schema_mismatch_case cat, system_path const & filename) { N(cat != SCHEMA_EMPTY, F("cannot use the empty sqlite database %s\n" "(monotone databases must be created with '%s db init')") % filename % ui.prog_name); N(cat != SCHEMA_NOT_MONOTONE, F("%s does not appear to be a monotone database\n") % filename); N(cat != SCHEMA_TOO_NEW, F("%s appears to be a monotone database, but this version of\n" "monotone does not recognize its schema.\n" "you probably need a newer version of monotone.") % filename); } // check_sql_schema is called by database.cc on open, to determine whether // the schema is up to date. If it returns at all, the schema is indeed // up to date (otherwise it throws a diagnostic). void check_sql_schema(sqlite3 * db, system_path const & filename) { I(db != NULL); schema_mismatch_case cat = classify_schema(db); diagnose_unrecognized_schema(cat, filename); N(cat != SCHEMA_MIGRATION_NEEDED, F("database %s is laid out according to an old schema\n" "try '%s db migrate' to upgrade\n" "(this is irreversible; you may want to make a backup copy first)") % filename % ui.prog_name); } void migrate_sql_schema(sqlite3 * db, app_state & app) { I(db != NULL); upgrade_regime regime = upgrade_none; MM(regime); // Take an exclusive lock on the database before we try to read anything // from it. If we don't take this lock until the beginning of the // "migrating data" phase, two simultaneous "db migrate" processes could // race through the "calculating migration" phase; then one of them would // wait for the other to finish all the migration steps, and trip over the // invariant check inside the for loop. { transaction guard(db); P(F("calculating migration...")); migration_event const *m; MM(m); schema_mismatch_case cat; MM(cat); m = find_migration(db); cat = classify_schema(db, m); diagnose_unrecognized_schema(cat, app.db.get_filename()); // We really want 'db migrate' on an up-to-date schema to be a no-op // (no vacuum or anything, even), so that automated scripts can fire // one off optimistically and not have to worry about getting their // administrators to do it by hand. if (cat == SCHEMA_MATCHES) { P(F("no migration performed; database schema already up-to-date")); return; } sql::create_function(db, "sha1", sqlite_sha1_fn); sql::create_function(db, "unbase64", sqlite3_unbase64_fn); P(F("migrating data...")); for (;;) { // confirm that we are where we ought to be string id; MM(id); calculate_schema_id(db, id); I(id == m->id); I(!m->migrator_sql || !m->migrator_func); if (m->migrator_sql) sql::exec(db, m->migrator_sql); else if (m->migrator_func) m->migrator_func(db, app); else break; regime = std::min(regime, m->regime); m++; I(m < migration_events + n_migration_events); P(F("migrated to schema %s") % m->id); } P(F("committing changes to database")); guard.commit(); } P(F("optimizing database")); sql::exec(db, "VACUUM"); switch (regime) { case upgrade_changesetify: case upgrade_rosterify: { string command_str = (regime == upgrade_changesetify ? "changesetify" : "rosterify"); P(F("NOTE: because this database was last used by a rather old version\n" "of monotone, you're not done yet. If you're a project leader, then\n" "see the file UPGRADE for instructions on running '%s db %s'") % ui.prog_name % command_str); } break; case upgrade_regen_caches: P(F("NOTE: this upgrade cleared monotone's caches\n" "you should now run '%s db regenerate_caches'") % ui.prog_name); break; case upgrade_none: break; } } // test_migration_step runs the migration step from SCHEMA to its successor, // *without* validating that the database actually conforms to that schema // first. the point of this is to test error recovery from conditions that // are not accessible through normal malformed dumps (because the schema // conformance check will reject them). void test_migration_step(sqlite3 * db, app_state & app, string const & schema) { I(db != NULL); sql::create_function(db, "sha1", sqlite_sha1_fn); sql::create_function(db, "unbase64", sqlite3_unbase64_fn); transaction guard(db); migration_event const *m; for (m = migration_events + n_migration_events - 1; m >= migration_events; m--) if (schema == m->id) break; N(m >= migration_events, F("cannot test migration from unknown schema %s") % schema); N(m->migrator_sql || m->migrator_func, F("schema %s is up to date") % schema); L(FL("testing migration from %s to %s\n in database %s") % schema % m[1].id % app.db.get_filename()); if (m->migrator_sql) sql::exec(db, m->migrator_sql); else m->migrator_func(db, app); // in the unlikely event that we get here ... P(F("successful migration to schema %s") % m[1].id); guard.commit(); } // Local Variables: // mode: C++ // fill-column: 76 // c-file-style: "gnu" // indent-tabs-mode: nil // End: // vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s: