diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..3dfb5dd --- /dev/null +++ b/Gemfile @@ -0,0 +1,4 @@ +source 'https://rubygems.org' + +# Specify your gem's dependencies in mysql_dump_split.gemspec +gemspec diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..6d942ab --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,17 @@ +PATH + remote: . + specs: + mysql_dump_split (0.0.1) + +GEM + remote: https://rubygems.org/ + specs: + rake (10.3.2) + +PLATFORMS + ruby + +DEPENDENCIES + bundler (>= 1.6) + mysql_dump_split! + rake (~> 10.0) diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..76a413f --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,22 @@ +Copyright (c) 2014 R.I. Pienaar + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.markdown b/README.markdown index 1d0a4d6..08aa944 100644 --- a/README.markdown +++ b/README.markdown @@ -4,13 +4,19 @@ What is it? A simple script that splits a MySQL dump into lots of smaller files. It works both with data definitions and data only dumps. +Install: + +```shell +gem install mysq-dump-split +``` + Usage: ------ First you need a mysqldump file, put it into the directory you want all the split files in: -
+```shell
$ ruby split-mysql-dump.rb db.sql
Found a new db: app
Found a new table: administrator_log
@@ -18,34 +24,34 @@ Found a new table: administrator_log
Found a new table: auth_strings
writing line: 239 205.482MB in 6 seconds 34.247MB/sec
-
+```
Alternatively, you can pipe in via STDIN in using '-s'. Great
for working with large gzipped backups:
-+```shell $ gunzip -c db.sql.gz | ruby split-mysql-dump.rb -s -+``` -You can also limit the dump to particular tables using '-t' -or exclude tables using '-i'. +You can also limit the dump to particular tables using '-t' +or exclude tables using '-i'. -
+```shell $ ruby split-mysql-dump.rb -t auth_strings, administrator_log db.sql -+``` and -
+```shell $ ruby split-mysql-dump.rb -i auth_strings -+``` When you're done you should have lots of files like this: -
+``` -rw-r--r-- 1 rip rip 210233252 May 17 18:06 administrator_log.sql -rw-r--r-- 1 rip rip 215463582 May 17 18:06 auth_strings.sql -+``` The first bit of the files will be the database that the tables are in based on the _USE_ statements in the dump. diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..809eb56 --- /dev/null +++ b/Rakefile @@ -0,0 +1,2 @@ +require "bundler/gem_tasks" + diff --git a/bin/mysql_dump_split b/bin/mysql_dump_split new file mode 100755 index 0000000..b624509 --- /dev/null +++ b/bin/mysql_dump_split @@ -0,0 +1,45 @@ +#!/usr/bin/env ruby + +require 'optparse' + +require 'mysql_dump_split' + +dump_split = MysqlDumpSplit.new + +OptionParser.new do |opts| + opts.banner = "Usage: split-mysql-dump.rb [options] [FILE]" + + opts.on("-s", "Read from stdin") do + dump_split.dumpfile = $stdin + end + + opts.on("-t", '--tables TABLES', Array, "Extract only these tables") do |t| + dump_split.tables = t + end + + opts.on("-i", '--ignore-tables TABLES', Array, "Ignore these tables") do |i| + dump_split.ignore = i + end + + opts.on("-u", '--use-database NAME', String, "Assume NAME as database name") do |n| + dump_split.use_database = n + end + + opts.on_tail("-h", "--help") do + puts opts + exit + end + +end.parse! + +dump_split.dumpfile ||= ARGV.shift + +unless dump_split.dumpfile + puts "Nothing to do" + exit +end + +STDOUT.sync = true + + +dump_split.split! diff --git a/lib/mysql_dump_split.rb b/lib/mysql_dump_split.rb new file mode 100644 index 0000000..1831c83 --- /dev/null +++ b/lib/mysql_dump_split.rb @@ -0,0 +1,151 @@ +require 'mysql_dump_split/version' + +class Numeric + def bytes_to_human + units = %w{B KB MB GB TB} + e = self > 0 ? (Math.log(self)/Math.log(1024)).floor : 0 + s = "%.3f" % (to_f / 1024**e) + s.sub(/\.?0*$/, units[e]) + end +end + +class MysqlDumpSplit + attr_accessor :tables, :ignore, :use_database + attr_reader :dumpfile + + NEW_TABLE = [/\A-- Table structure for table .(.+)./, + /\A-- Dumping data for table .(.+)./, + /\A# Dump of table.(.+)/] + NEW_DB = [/\A-- Current Database: .(.+)./] + NEW_POSITION = [/\A-- Position to start replication or point-in-time recovery from/] + + def initialize + @tables = [] + @ignore = [] + @use_database = nil + @dumpfile = nil + @print_each = 10 + end + + def dumpfile=(dumpfile) + @dumpfile = open(dumpfile) + end + + def split! + return unless File.exist?(@dumpfile) + @current_db = @use_database + + @line_count = @table_count = @start_time = 0 + + @current_table = @outfile = nil + + while (line = @dumpfile.gets) do + + case line + when *NEW_TABLE + is_new_table = (@current_table != $1) + table = $1 + + new_table(table) if is_new_table + + when *NEW_DB + @current_db = $1 + @current_table = nil + + close_outfile + + Dir.mkdir(@current_db) + Dir.mkdir("#{@current_db}/tables") + self.outfile = "#{@current_db}/create.sql" + puts("\n\nFound a new db: #{@current_db}") + + when *NEW_POSITION + @current_db = nil + @current_table = nil + + close_outfile + + self.outfile = '1replication.sql' + puts("\n\nFound replication data") + + else + write(line) + end + end + + puts + end + + def outfile=(name) + @outfile = File.new(name, 'w') + end + + def write(line) + return unless @outfile + return if @outfile.closed? + + @outfile.syswrite(line) + @line_count += 1 + + if @line_count % @print_each == 0 + print_status + end + end + + def print_status + elapsed = Time.now.to_i - @start_time.to_i + 1 + print(" writing line: #{@line_count} #{@outfile.stat.size.bytes_to_human} in #{elapsed} seconds #{(@outfile.stat.size / elapsed).bytes_to_human}/sec \r") + end + + def close_outfile + @outfile.close if @outfile and !@outfile.closed? + @outfile = nil + end + + def included?(table) + @tables.empty? or @tables.include?(table) + end + + def ignored?(table) + ! @ignore.empty? && @ignore.include?(table) + end + + def new_table(table) + close_outfile + + puts("\n\nFound a new table: #{table}") + + if not included?(table) + puts "`#{table}` not in list, ignoring" + elsif ignored?(table) + puts "`#{table}` will be ignored" + else + @start_time = Time.now + @line_count = 0 + @table_count += 1 + path = tables_path + Dir.mkdir(path) unless File.exists?(path) + self.outfile = "#{path}/#{table}.sql" + + if db = @current_db + @outfile.syswrite("USE `#{db}`;\n\n") + end + end + + @current_table = table + end + + def tables_path + [@current_db, 'tables'].compact.join('/') + end + + private + + def open(dumpfile) + if dumpfile == $stdin + $stdin + elsif dumpfile + File.new(dumpfile, "r:binary") + end + end +end diff --git a/lib/mysql_dump_split/version.rb b/lib/mysql_dump_split/version.rb new file mode 100644 index 0000000..72a042d --- /dev/null +++ b/lib/mysql_dump_split/version.rb @@ -0,0 +1,3 @@ +class MysqlDumpSplit + VERSION = "0.0.1" +end diff --git a/mysql_dump_split.gemspec b/mysql_dump_split.gemspec new file mode 100644 index 0000000..f1c305c --- /dev/null +++ b/mysql_dump_split.gemspec @@ -0,0 +1,23 @@ +# coding: utf-8 +lib = File.expand_path('../lib', __FILE__) +$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) +require 'mysql_dump_split/version' + +Gem::Specification.new do |spec| + spec.name = 'mysql_dump_split' + spec.version = MysqlDumpSplit::VERSION + spec.authors = ['R.I. Pienaar'] + spec.email = ['rip@devco.net'] + spec.summary = %q{Splits a MySQL dump into lots of smaller files.} + spec.description = %q{It works both with data definitions and data only dumps.} + spec.homepage = 'https://github.com/ripienaar/mysql-dump-split' + spec.license = 'MIT' + + spec.files = `git ls-files -z`.split("\x0") + spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } + spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) + spec.require_paths = ['lib'] + + spec.add_development_dependency 'bundler', '>= 1.6' + spec.add_development_dependency 'rake', '~> 10.0' +end