Skip to content

Commit

Permalink
feat: Optional password, sort by format
Browse files Browse the repository at this point in the history
usage: ./hdfc-cc-parser-rs --dir /tmp/statements --password password --output output.csv --sortformat="%d-%m-%Y"
  • Loading branch information
joeirimpan committed Mar 15, 2023
1 parent 710f11e commit 2860110
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 13 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ regex = "1.7.1"
pdf_encoding = "0.3.0"
euclid = "0.22.6"
log = "*"
clap = "4.1.8"
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ This tool parse and extract information from HDFC Bank credit card statements in
* Clone this repository: `git clone https://github.com/joeirimpan/hdfc-cc-parser-rs.git`
* Navigate to the repository directory: cd hdfc-cc-parser-rs
* Build the project: `cargo build --release`
* Run the binary: `./target/release/hdfc-cc-parser-rs </path/to/statements> <password> <output.csv>`
* Run the binary: `./target/release/hdfc-cc-parser-rs --dir </path/to/statements> --password <optional password> --output <output.csv> --sortformat="optional format eg., %d-%m-%Y"`

## Why?

Expand Down
66 changes: 54 additions & 12 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ mod pdf_tools;

use anyhow::{Context, Error};
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
use clap::{arg, Command};
use csv::Writer;
use pdf::content::*;
use pdf::file::File as pdfFile;
use pdf_tools::ops_with_text_state;
use regex::Regex;
use std::env::args;
use std::fs;
use std::fs::File;
use std::str::FromStr;
Expand Down Expand Up @@ -203,15 +203,39 @@ pub fn parse(path: String, _password: String) -> Result<Vec<Transaction>, Error>
Ok(members)
}

fn date_format_to_regex(date_format: &str) -> Regex {
let regex_str = date_format
.replace("%Y", r"\d{4}")
.replace("%m", r"\d{2}")
.replace("%d", r"\d{2}")
.replace("%H", r"\d{2}")
.replace("%M", r"\d{2}")
.replace("%S", r"\d{2}")
.replace("%z", r"[\+\-]\d{4}")
.replace("%Z", r"[A-Z]{3}");

Regex::new(&regex_str).unwrap()
}

fn main() -> Result<(), Error> {
let path = args().nth(1).expect("no dir given");
let _password = args().nth(2).expect("no password given");
let output = args().nth(3).expect("no output file given");
let matches = Command::new("HDFC credit card statement parser")
.arg(arg!(--dir <path_to_directory>).required(true))
.arg(arg!(--password <password>).required(false))
.arg(arg!(--output <output>).required(true))
.arg(
arg!(--sortformat <date_format>)
.required(false)
.default_value("%d-%m-%Y"),
)
.get_matches();

let entries = fs::read_dir(path).unwrap();
let path = matches.get_one::<String>("dir");
let _password = matches.get_one::<String>("password");
let output = matches.get_one::<String>("output").unwrap().to_string();

let entries = fs::read_dir(path.unwrap()).unwrap();

// Filter pdf files, sort the statement files based on dates in the file names.
let re = Regex::new(r"(\d{1,2}-\d{2}-\d{4})").unwrap();
let mut pdf_files: Vec<String> = entries
.filter_map(Result::ok)
.map(|entry| entry.path())
Expand All @@ -221,16 +245,34 @@ fn main() -> Result<(), Error> {
})
.map(|path| path.to_string_lossy().to_string())
.collect();
pdf_files.sort_by(|a, b| {
let a_date = NaiveDate::parse_from_str(&re.captures(a).unwrap()[1], "%d-%m-%Y").unwrap();
let b_date = NaiveDate::parse_from_str(&re.captures(b).unwrap()[1], "%d-%m-%Y").unwrap();
a_date.cmp(&b_date)
});

// Sort only if there is a date format specified
if let Some(sort_format) = matches.get_one::<String>("sortformat") {
pdf_files.sort_by(|a, b| {
let re = date_format_to_regex(sort_format);
let a_date = match re.find(a) {
Some(date_str) => {
NaiveDate::parse_from_str(date_str.as_str(), sort_format).unwrap()
}
None => NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(),
};
let b_date = match re.find(b) {
Some(date_str) => {
NaiveDate::parse_from_str(date_str.as_str(), sort_format).unwrap()
}
None => NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(),
};
a_date.cmp(&b_date)
})
}

// Parse all the statement files.
let mut members = Vec::new();
for file in pdf_files {
members.extend(parse(file, _password.clone()).context("Failed to parse statement")?)
members.extend(
parse(file, _password.unwrap_or(&"".to_string()).to_string())
.context("Failed to parse statement")?,
)
}

// Create a csv file and write the contents of the transaction list
Expand Down

0 comments on commit 2860110

Please sign in to comment.