From 69670e35376d20709e694466190d6c866f932074 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Thu, 29 Aug 2024 17:18:46 -0400 Subject: [PATCH] all info directives are now supported --- cli/src/args/info.rs | 17 ++- cli/src/compress.rs | 1 + cli/src/extract.rs | 4 +- cli/src/extract/matcher.rs | 4 +- cli/src/extract/receiver.rs | 44 +++++- cli/src/info/directives.rs | 187 ++++++++++++++++++++++--- cli/src/info/formats.rs | 82 ++++++++++- src/extra_fields/extended_timestamp.rs | 2 +- 8 files changed, 302 insertions(+), 39 deletions(-) diff --git a/cli/src/args/info.rs b/cli/src/args/info.rs index 780b696df..f571829c5 100644 --- a/cli/src/args/info.rs +++ b/cli/src/args/info.rs @@ -247,7 +247,6 @@ impl UnixModeFormat { #[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum TimestampFormat { - UnixEpochMilliseconds, DateOnly, TimeOnly, #[default] @@ -258,7 +257,6 @@ impl TimestampFormat { pub fn parse(s: &str) -> Result { match s { "" => Ok(Self::default()), - ":epoch" => Ok(Self::UnixEpochMilliseconds), ":date" => Ok(Self::DateOnly), ":time" => Ok(Self::TimeOnly), ":date-time" => Ok(Self::DateAndTime), @@ -337,6 +335,7 @@ pub enum EntryFormatDirective { LocalHeaderStart(OffsetFormat), ContentStart(OffsetFormat), ContentEnd(OffsetFormat), + CentralHeaderStart(OffsetFormat), CompressedSize(ByteSizeFormat), UncompressedSize(ByteSizeFormat), UnixMode(UnixModeFormat), @@ -374,6 +373,11 @@ impl ParseableDirective for EntryFormatDirective { .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; Ok(Self::ContentEnd(offset_fmt)) } + s if s.starts_with("central-header-start") => { + let offset_fmt = OffsetFormat::parse(&s["central-header-start".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::CentralHeaderStart(offset_fmt)) + } s if s.starts_with("compressed-size") => { let size_fmt = ByteSizeFormat::parse(&s["compressed-size".len()..]) .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; @@ -563,6 +567,10 @@ all the output to a single line. The offset of the end of the entry's possibly-compressed content. The next entry's local header begins immediately after. +%central-header-start% + The offset of the entry's central directory header, at the end of the + zip file. + %compressed-size% The size of the entry's possibly-compressed content as stored in the archive. @@ -584,7 +592,7 @@ all the output to a single line. %timestamp% The timestamp for the entry. - Note that zip timestamps only have precision down to the minute. + Note that zip timestamps only have precision down to 2 seconds. ## Entry format directives: @@ -608,9 +616,8 @@ unix-mode = '' [DEFAULT => octal] = ':pretty' (`ls`-like permissions string) timestamp = '' [DEFAULT => date-time] - = ':epoch' (milliseconds since unix epoch as a decimal number) = ':date' (ISO 8601 string representation of date) - = ':time' (HH:MM string representation of time) + = ':time' (HH:MM:SS string representation of time) = ':date-time' (ISO 8601 date then HH:MM time joined by a space) diff --git a/cli/src/compress.rs b/cli/src/compress.rs index 784f835b1..e35058273 100644 --- a/cli/src/compress.rs +++ b/cli/src/compress.rs @@ -418,6 +418,7 @@ pub fn execute_compress(mut err: impl Write, args: Compress) -> Result<(), Comma "name {last_name} remaining after all entry flags processed" ))); } + for pos_arg in positional_paths.into_iter() { let file_type = fs::symlink_metadata(&pos_arg) .wrap_err_with(|| format!("failed to read metadata from path {}", pos_arg.display()))? diff --git a/cli/src/extract.rs b/cli/src/extract.rs index 69efe3deb..f5aaa28c7 100644 --- a/cli/src/extract.rs +++ b/cli/src/extract.rs @@ -33,7 +33,7 @@ fn maybe_process_symlink<'a, 't>( * contents with io::Read. ZipEntry<'a, R> from * https://github.com/zip-rs/zip2/pull/233 avoids this issue!!! */ let data = EntryData::from_entry(&entry); - (data.kind, data.size) + (data.kind, data.uncompressed_size) }; if !matches!(kind, EntryKind::Symlink) { return Ok(None); @@ -86,7 +86,7 @@ where deduped_matching_extracts .into_iter() .flat_map(|(recv, names)| names.into_iter().map(move |n| (recv, n))) - .map(|(recv, name)| recv.generate_entry_handle(data, symlink_target.as_deref(), name)) + .map(|(recv, name)| recv.generate_entry_handle(&data, symlink_target.as_deref(), name)) .collect::, _>>()? .into_iter() .flatten(), diff --git a/cli/src/extract/matcher.rs b/cli/src/extract/matcher.rs index af382369d..9e3eb463f 100644 --- a/cli/src/extract/matcher.rs +++ b/cli/src/extract/matcher.rs @@ -391,8 +391,8 @@ impl EntryMatcher for Size { fn matches(&self, entry: &EntryData) -> bool { match self { - Self::Max(max) => entry.size <= *max, - Self::Min(min) => entry.size >= *min, + Self::Max(max) => entry.uncompressed_size <= *max, + Self::Min(min) => entry.uncompressed_size >= *min, } } } diff --git a/cli/src/extract/receiver.rs b/cli/src/extract/receiver.rs index 6b106dbd2..6495ccd60 100644 --- a/cli/src/extract/receiver.rs +++ b/cli/src/extract/receiver.rs @@ -8,7 +8,11 @@ use std::{ rc::Rc, }; -use zip::{read::ZipFile, CompressionMethod}; +use zip::{ + extra_fields::{ExtendedTimestamp, ExtraField}, + read::ZipFile, + CompressionMethod, DateTime, +}; use super::matcher::{CompiledMatcher, EntryMatcher}; use super::transform::{CompiledTransformer, NameTransformer}; @@ -21,13 +25,21 @@ pub enum EntryKind { Symlink, } -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct EntryData<'a> { pub name: &'a str, pub kind: EntryKind, pub compression: CompressionMethod, pub unix_mode: Option, - pub size: u64, + pub comment: &'a str, + pub uncompressed_size: u64, + pub compressed_size: u64, + pub local_header_start: u64, + pub content_start: u64, + pub central_header_start: u64, + pub crc32: u32, + pub last_modified_time: Option, + pub extended_timestamp: Option, } impl<'a> EntryData<'a> { @@ -44,9 +56,27 @@ impl<'a> EntryData<'a> { }, compression: entry.compression(), unix_mode: entry.unix_mode(), - size: entry.size(), + comment: entry.comment(), + uncompressed_size: entry.size(), + compressed_size: entry.compressed_size(), + local_header_start: entry.header_start(), + content_start: entry.data_start(), + central_header_start: entry.central_header_start(), + crc32: entry.crc32(), + last_modified_time: entry.last_modified(), + extended_timestamp: entry + .extra_data_fields() + .find_map(|f| match f { + ExtraField::ExtendedTimestamp(ts) => Some(ts), + }) + .cloned(), } } + + #[inline(always)] + pub const fn content_end(&self) -> u64 { + self.content_start + self.compressed_size + } } pub struct ConcatEntry<'w> { @@ -136,7 +166,7 @@ pub enum MatchingEntrySpec<'a, 'c, 'w> { impl<'a, 'c, 'w> MatchingEntrySpec<'a, 'c, 'w> { /* Split output handles for concat, and split generated handles by extract source and - * name. use ptr::eq() to split, and Cow::<'s, str>::eq() with str AsRef. */ + * name. use Rc::ptr_eq() to split, and Cow::<'s, str>::eq() with str AsRef. */ pub fn is_nested_duplicate( self, deduped_concat_writers: &mut Vec<&'c Rc>>, @@ -177,7 +207,7 @@ impl<'a, 'c, 'w> MatchingEntrySpec<'a, 'c, 'w> { pub trait EntryReceiver: fmt::Debug { fn generate_entry_handle<'s>( &self, - data: EntryData<'s>, + data: &EntryData<'s>, symlink_target: Option<&[u8]>, name: Cow<'s, str>, ) -> Result>, CommandError>; @@ -274,7 +304,7 @@ where { fn generate_entry_handle<'s>( &self, - data: EntryData<'s>, + data: &EntryData<'s>, symlink_target: Option<&[u8]>, name: Cow<'s, str>, ) -> Result>, CommandError> { diff --git a/cli/src/info/directives.rs b/cli/src/info/directives.rs index 785227bc6..e4e3e5bfd 100644 --- a/cli/src/info/directives.rs +++ b/cli/src/info/directives.rs @@ -181,8 +181,8 @@ pub mod compiled { pub mod entry { use super::{ super::formats::{ - ByteSizeValue, CompressionMethodValue, FileTypeValue, FormatValue, NameString, - UnixModeValue, + BinaryNumericValue, BinaryStringValue, ByteSizeValue, CompressionMethodValue, + FileTypeValue, FormatValue, NameString, OffsetValue, TimestampValue, UnixModeValue, }, FormatDirective, }; @@ -220,6 +220,118 @@ pub mod entry { } } + pub struct EntryCommentField(pub BinaryStringValue); + + impl FormatDirective for EntryCommentField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = BinaryStringValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.comment.as_bytes()) + } + fn value_formatter(&self) -> BinaryStringValue { + self.0 + } + } + + pub struct LocalHeaderStartField(pub OffsetValue); + + impl FormatDirective for LocalHeaderStartField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.local_header_start) + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct ContentStartField(pub OffsetValue); + + impl FormatDirective for ContentStartField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.content_start) + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct UncompressedSizeField(pub ByteSizeValue); + + impl FormatDirective for UncompressedSizeField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = ByteSizeValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.uncompressed_size + } + fn value_formatter(&self) -> ByteSizeValue { + self.0 + } + } + + pub struct CompressedSizeField(pub ByteSizeValue); + + impl FormatDirective for CompressedSizeField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = ByteSizeValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.compressed_size + } + fn value_formatter(&self) -> ByteSizeValue { + self.0 + } + } + + pub struct ContentEndField(pub OffsetValue); + + impl FormatDirective for ContentEndField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.content_end()) + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct CentralHeaderStartField(pub OffsetValue); + + impl FormatDirective for CentralHeaderStartField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.central_header_start) + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + pub struct CompressionMethodField(pub CompressionMethodValue); impl FormatDirective for CompressionMethodField { @@ -252,18 +364,34 @@ pub mod entry { } } - pub struct UncompressedSizeField(pub ByteSizeValue); + pub struct Crc32Field(pub BinaryNumericValue); - impl FormatDirective for UncompressedSizeField { + impl FormatDirective for Crc32Field { type Data<'a> = &'a EntryData<'a>; - type FieldType = ByteSizeValue; + type FieldType = BinaryNumericValue; fn extract_field<'a>( &self, data: Self::Data<'a>, ) -> ::Input<'a> { - data.size + data.crc32 } - fn value_formatter(&self) -> ByteSizeValue { + fn value_formatter(&self) -> BinaryNumericValue { + self.0 + } + } + + pub struct TimestampField(pub TimestampValue); + + impl FormatDirective for TimestampField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = TimestampValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.last_modified_time + } + fn value_formatter(&self) -> TimestampValue { self.0 } } @@ -326,6 +454,9 @@ pub mod entry { Ok(CompiledEntryDirective(match spec { EntryFormatDirective::Name => Box::new(EntryNameField(NameString)), EntryFormatDirective::FileType(f) => Box::new(FileTypeField(FileTypeValue(f))), + EntryFormatDirective::CompressedSize(f) => { + Box::new(CompressedSizeField(ByteSizeValue(f))) + } EntryFormatDirective::UncompressedSize(f) => { Box::new(UncompressedSizeField(ByteSizeValue(f))) } @@ -333,7 +464,27 @@ pub mod entry { EntryFormatDirective::CompressionMethod(f) => { Box::new(CompressionMethodField(CompressionMethodValue(f))) } - _ => todo!(), + EntryFormatDirective::Comment(f) => { + Box::new(EntryCommentField(BinaryStringValue(f))) + } + EntryFormatDirective::LocalHeaderStart(f) => { + Box::new(LocalHeaderStartField(OffsetValue(f))) + } + EntryFormatDirective::ContentStart(f) => { + Box::new(ContentStartField(OffsetValue(f))) + } + EntryFormatDirective::ContentEnd(f) => { + Box::new(ContentEndField(OffsetValue(f))) + } + EntryFormatDirective::CentralHeaderStart(f) => { + Box::new(CentralHeaderStartField(OffsetValue(f))) + } + EntryFormatDirective::CrcValue(f) => { + Box::new(Crc32Field(BinaryNumericValue(f))) + } + EntryFormatDirective::Timestamp(f) => { + Box::new(TimestampField(TimestampValue(f))) + } })) } } @@ -354,7 +505,7 @@ pub mod archive { use std::path::Path; - #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ArchiveData<'a> { pub path: Option<&'a Path>, pub stream_length: u64, @@ -366,19 +517,13 @@ pub mod archive { impl<'a> ArchiveData<'a> { pub fn from_archive_with_path(zip: &'a ArchiveWithPath) -> Self { - let path = zip.path.as_path(); - let stream_length = zip.len; - let num_entries = zip.archive.len(); - let comment = zip.archive.comment(); - let first_entry_start = zip.archive.offset(); - let central_directory_start = zip.archive.central_directory_start(); Self { - path: Some(path), - stream_length, - num_entries, - comment: Some(comment), - first_entry_start: Some(first_entry_start), - central_directory_start: Some(central_directory_start), + path: Some(zip.path.as_path()), + stream_length: zip.len, + num_entries: zip.archive.len(), + comment: Some(zip.archive.comment()), + first_entry_start: Some(zip.archive.offset()), + central_directory_start: Some(zip.archive.central_directory_start()), } } } diff --git a/cli/src/info/formats.rs b/cli/src/info/formats.rs index 0506f33c8..a320fb122 100644 --- a/cli/src/info/formats.rs +++ b/cli/src/info/formats.rs @@ -5,7 +5,7 @@ use std::{ path, }; -use zip::CompressionMethod; +use zip::{CompressionMethod, DateTime}; use super::directives::Writeable; use crate::{args::info::*, extract::receiver::EntryKind}; @@ -289,6 +289,36 @@ impl FormatValue for OffsetValue { } } +#[derive(Copy, Clone)] +pub struct BinaryNumericValue(pub BinaryNumericValueFormat); + +#[derive(Debug)] +pub enum BinaryNumericValueWriter { + Decimal(u32), + Hexadecimal(u32), +} + +impl fmt::Display for BinaryNumericValueWriter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Decimal(x) => write!(f, "{}", x), + Self::Hexadecimal(x) => write!(f, "{:x}", x), + } + } +} + +impl FormatValue for BinaryNumericValue { + type Input<'a> = u32; + type Output<'a> = BinaryNumericValueWriter; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match self.0 { + BinaryNumericValueFormat::Decimal => BinaryNumericValueWriter::Decimal(input), + BinaryNumericValueFormat::Hexadecimal => BinaryNumericValueWriter::Hexadecimal(input), + }) + } +} + #[derive(Copy, Clone)] pub struct BinaryStringValue(pub BinaryStringFormat); @@ -343,3 +373,53 @@ impl FormatValue for BinaryStringValue { }) } } + +#[derive(Copy, Clone)] +pub struct TimestampValue(pub TimestampFormat); + +#[derive(Debug)] +pub enum TimestampValueWriter { + None, + DateOnly(DateTime), + TimeOnly(DateTime), + DateAndTime(DateTime), +} + +impl fmt::Display for TimestampValueWriter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::None => write!(f, "?"), + Self::DateOnly(d) => write!(f, "{}-{}-{}", d.year(), d.month(), d.day()), + Self::TimeOnly(t) => write!(f, "{}:{}:{}", t.hour(), t.minute(), t.second()), + Self::DateAndTime(dt) => { + write!( + f, + "{}-{}-{} {}:{}:{}", + dt.year(), + dt.month(), + dt.day(), + dt.hour(), + dt.minute(), + dt.second() + ) + } + } + } +} + +impl FormatValue for TimestampValue { + type Input<'a> = Option; + type Output<'a> = TimestampValueWriter; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + let input = match input { + None => return Ok(TimestampValueWriter::None), + Some(input) => input, + }; + Ok(match self.0 { + TimestampFormat::DateOnly => TimestampValueWriter::DateOnly(input), + TimestampFormat::TimeOnly => TimestampValueWriter::TimeOnly(input), + TimestampFormat::DateAndTime => TimestampValueWriter::DateAndTime(input), + }) + } +} diff --git a/src/extra_fields/extended_timestamp.rs b/src/extra_fields/extended_timestamp.rs index 1cc0f1de4..0cf794c3c 100644 --- a/src/extra_fields/extended_timestamp.rs +++ b/src/extra_fields/extended_timestamp.rs @@ -4,7 +4,7 @@ use std::io::Read; /// extended timestamp, as described in -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ExtendedTimestamp { mod_time: Option, ac_time: Option,