-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add option to preserve sequence number generators #34
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -65,6 +65,13 @@ struct Args { | |
/// and pg_parcel.toml. | ||
#[clap(long = "no-feature", value_delimiter = ',', display_order = 6)] | ||
skipped_features: Option<Vec<String>>, | ||
|
||
/// Retain the current value of sequence number generators. | ||
/// This will ensure that any tables that use sequences in any | ||
/// of their columns will continue generating values starting from | ||
/// the last known value | ||
#[clap(long, default_value_t = false, display_order = 7)] | ||
keep_sequences: bool, | ||
} | ||
|
||
/// Options here is a combination of command line arguments and contents of the slicefile. | ||
|
@@ -79,6 +86,7 @@ struct Options { | |
estimate_only: bool, | ||
truncate: bool, | ||
features: HashSet<String>, | ||
keep_sequences: bool, | ||
} | ||
|
||
impl Options { | ||
|
@@ -121,6 +129,7 @@ impl Options { | |
estimate_only: args.estimate_only, | ||
truncate: args.truncate, | ||
features, | ||
keep_sequences: args.keep_sequences, | ||
}; | ||
Ok(options) | ||
} | ||
|
@@ -265,19 +274,38 @@ fn main() -> Result<(), Box<dyn Error>> { | |
)?; | ||
} | ||
|
||
let mut stdout = std::io::stdout(); | ||
|
||
// Dump table data. | ||
for table in tables.iter() { | ||
let query = table.copy_out_query(&options); | ||
// let query = format!("{query} LIMIT 10"); // TESTING ONLY | ||
let copy_statement = format!("COPY ({}) TO stdout;", query); | ||
pb.set_message(table.name.to_owned()); | ||
{ | ||
let query = table.copy_out_query(&options); | ||
// let query = format!("{query} LIMIT 10"); // TESTING ONLY | ||
let copy_statement = format!("COPY ({}) TO stdout;", query); | ||
pb.set_message(table.name.to_owned()); | ||
|
||
writeln!(stdout, "{};", table.copy_in_query())?; | ||
let mut reader = client.copy_out(©_statement)?; | ||
let size = std::io::copy(&mut reader, &mut stdout)?; | ||
sizes.push((table.name.clone(), size)); | ||
writeln!(stdout, "\\.")?; | ||
} | ||
|
||
let mut stdout = std::io::stdout(); | ||
writeln!(stdout, "{};", table.copy_in_query())?; | ||
let mut reader = client.copy_out(©_statement)?; | ||
let size = std::io::copy(&mut reader, &mut stdout)?; | ||
sizes.push((table.name.clone(), size)); | ||
writeln!(stdout, "\\.")?; | ||
{ | ||
if options.keep_sequences { | ||
for sequence in table.sequences.iter() { | ||
let current_sequence_value = client | ||
.query_one(&table.current_sequence_value_query(sequence), &[])? | ||
.get::<usize, String>(0) | ||
.parse::<u64>()?; | ||
writeln!( | ||
stdout, | ||
"{};", | ||
table.restore_sequences_query(sequence, current_sequence_value) | ||
)?; | ||
} | ||
} | ||
} | ||
|
||
pb.inc(1); | ||
} | ||
|
@@ -314,6 +342,7 @@ struct Table { | |
schema: String, | ||
size: u64, // Bytes. | ||
rows: u64, // Estimate. | ||
sequences: Vec<Sequence>, | ||
} | ||
|
||
impl Table { | ||
|
@@ -324,6 +353,23 @@ impl Table { | |
self.name.sql_identifier() | ||
) | ||
} | ||
fn current_sequence_value_query(&self, sequence: &Sequence) -> String { | ||
format!( | ||
r#"SELECT COALESCE(MAX("{column_name}"), 1)::text FROM {schema}."{table_name}""#, | ||
schema = &self.schema, | ||
column_name = sequence.column_name, | ||
table_name = &self.name, | ||
) | ||
} | ||
fn restore_sequences_query(&self, sequence: &Sequence, current_sequence_value: u64) -> String { | ||
format!( | ||
r#"SELECT SETVAL('{schema}."{sequence_name}"', {current_sequence_value}) FROM {schema}."{table_name}""#, | ||
schema = &self.schema, | ||
sequence_name = sequence.sequence_name, | ||
current_sequence_value = current_sequence_value, | ||
table_name = &self.name, | ||
) | ||
} | ||
fn copy_out_query(&self, options: &Options) -> String { | ||
let column_values = options.column_values.iter().map(|s| s.sql_value()); | ||
let column_values = intersperse(column_values, ",".to_string()).collect::<String>(); | ||
|
@@ -378,6 +424,12 @@ struct Column { | |
pub is_nullable: bool, | ||
} | ||
|
||
#[derive(Debug, Clone)] | ||
struct Sequence { | ||
pub column_name: String, | ||
pub sequence_name: String, | ||
} | ||
|
||
fn get_tables(options: &Options) -> Result<Vec<Table>, Box<dyn Error>> { | ||
let mut client = pg_client(options)?; | ||
let query = format!( | ||
|
@@ -387,7 +439,8 @@ fn get_tables(options: &Options) -> Result<Vec<Table>, Box<dyn Error>> { | |
pg_total_relation_size(pg_class.oid)::text as table_size, | ||
max(pg_class.reltuples::int8)::text as table_rows, -- https://wiki.postgresql.org/wiki/Count_estimate | ||
array_agg(columns.column_name::text order by columns.ordinal_position) as column_names, | ||
array_agg(columns.is_nullable = 'YES' order by columns.ordinal_position) as column_nullables | ||
array_agg(columns.is_nullable = 'YES' order by columns.ordinal_position) as column_nullables, | ||
array_agg(pg_attribute.attname::text || '::' || seqs.relname::text) FILTER (WHERE seqs.relname IS NOT NULL) AS sequences | ||
from information_schema.tables | ||
join information_schema.columns on ( | ||
columns.table_catalog = tables.table_catalog | ||
|
@@ -400,6 +453,14 @@ fn get_tables(options: &Options) -> Result<Vec<Table>, Box<dyn Error>> { | |
join pg_class on ( | ||
pg_class.relnamespace = pg_namespace.oid | ||
and pg_class.relname = tables.table_name) | ||
join pg_attribute ON pg_attribute.attname = columns. "column_name" | ||
and pg_class.oid = pg_attribute.attrelid | ||
left join ( | ||
select sequences.*, pg_depend.refobjid tbl_id, pg_depend.refobjsubid col_id | ||
from pg_class sequences | ||
join pg_depend ON pg_depend.objid = sequences.oid | ||
where sequences.relkind = 'S' | ||
) seqs ON seqs.tbl_id = pg_class.oid AND seqs.col_id = pg_attribute.attnum | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PostgreSQL sequences don't have to be associated with a column. They're all free-standing, but can be "owned" by a table, but I think that only means that they're dropped when the table is dropped (see I think sequences should be considered as a separate top-level object, and not grouped in with tables. They exist in the same namespace, so the same include/exclude rules can be applied to select which sequences to restore. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This kinda ties into why I didn't use For both figuring out what the current value of the sequence should be, and which sequences actually even get their current value preserved, I stuck to the table as the source of truth with the understanding that what i.e. what happens if we intentionally skip a table, but then don't explicitly also skip any sequences that the table depends on? If we always restore the sequence value, then the sequence value would be current but the table would be empty, which might be unexpected? Should you always have to explicitly skip both at the same time? But, that was an assumption I made, would be happy to change both of these with the understanding that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be fair to say that if we are dumping a table, we should implicitly also dump any associated sequences? I think that maybe the ability to dump sequences that aren't connected to any (requested) tables should be considered a separate feature. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So how do we feel about two options, one called Or, just one option called (conceptually; naming can be whatever) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From the README,
From the
Ergo: To elaborate, sequences are first-class entities within a schema, on a level with tables. They can be associated with a table but they don't belong to a table. A sequence's value is its data, just as rows are a table's data. I think it's conceptually sound to dump values for all sequences in the schema. In fact I think it's a bug or at least a missing feature that we didn't do that before, by default, and What are the consequences if we just do that? What are we trying to prevent by not doing it? |
||
where tables.table_schema = {schema} | ||
and tables.table_type = 'BASE TABLE' | ||
group by tables.table_name, pg_class.oid | ||
|
@@ -426,12 +487,29 @@ fn get_tables(options: &Options) -> Result<Vec<Table>, Box<dyn Error>> { | |
.zip(column_nullables) | ||
.map(|(name, is_nullable)| Column { name, is_nullable }) | ||
.collect(); | ||
let sequences: Vec<Sequence> = row | ||
.get::<&str, Option<Vec<String>>>("sequences") | ||
.unwrap_or(Vec::with_capacity(0)) | ||
JQuezada0 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
.iter() | ||
.map(|seq| -> Sequence { | ||
let mut parts = seq.split("::"); | ||
let column_name = parts.next().unwrap().to_string(); | ||
let sequence_name = parts.next().unwrap().to_string(); | ||
|
||
Sequence { | ||
column_name, | ||
sequence_name, | ||
} | ||
}) | ||
.collect(); | ||
|
||
Some(Table { | ||
name: table_name, | ||
columns, | ||
schema: options.schema.clone(), | ||
size: table_size, | ||
rows: table_rows, | ||
sequences, | ||
}) | ||
} | ||
}) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use the
SqlString
trait here to escape, plus a couple of other things:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's possible to get a sequence's last value by querying it like a table. Also, we can use
sql_value
to construct the argument toSETVAL
.https://www.postgresql.org/docs/12/functions-sequence.html