From 03e2865ce66148bc3d2be8490f836846dd0099b8 Mon Sep 17 00:00:00 2001 From: Zach Kimberg Date: Thu, 27 Jul 2023 17:04:18 -0700 Subject: [PATCH] Fix AmazonReviews and fixed mkdocs version (#2725) * Fix AmazonReviews The amazon reviews has a dependency on another s3 bucket that was removed, breaking the CI. This fixes it by pulling from a local cache. * Try to fix mkdocs version --- .github/workflows/docs.yml | 2 +- .../basicdataset/amazon_reviews/metadata.json | 21 +++---------------- .../TrainAmazonReviewRanking.java | 2 +- ...fication_using_BERT_on_Amazon_Review.ipynb | 2 +- 4 files changed, 6 insertions(+), 21 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index e4bfcc76bf2..45e2177466d 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -27,7 +27,7 @@ jobs: - name: Install CN fonts run: sudo apt-get update && sudo apt-get install fonts-arphic-uming - name: install Python Dependencies - run: pip3 install nbconvert mkdocs mkdocs-exclude mknotebooks mkdocs-material jupyter Pygments Markdown + run: pip3 install nbconvert mkdocs==1.4.3 mkdocs-exclude mknotebooks mkdocs-material jupyter Pygments Markdown - name: Install IJava kernel run: | git clone https://github.com/frankfliu/IJava.git diff --git a/basicdataset/src/test/resources/mlrepo/dataset/nlp/ai/djl/basicdataset/amazon_reviews/metadata.json b/basicdataset/src/test/resources/mlrepo/dataset/nlp/ai/djl/basicdataset/amazon_reviews/metadata.json index 16f4fcd22e6..373b4fe9ed3 100644 --- a/basicdataset/src/test/resources/mlrepo/dataset/nlp/ai/djl/basicdataset/amazon_reviews/metadata.json +++ b/basicdataset/src/test/resources/mlrepo/dataset/nlp/ai/djl/basicdataset/amazon_reviews/metadata.json @@ -17,24 +17,9 @@ }, "files": { "amazon_reviews": { - "uri": "https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_Digital_Software_v1_00.tsv.gz", - "sha1Hash": "b8390100b92579ed814eede4112514417e339902", - "size": 18997559 - } - } - }, - { - "version": "1.0", - "snapshot": false, - "name": "amazon_reviews_us_Software", - "properties": { - "dataset": "us_Software" - }, - "files": { - "amazon_reviews": { - "uri": "https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_Software_v1_00.tsv.gz", - "sha1Hash": "e48346dd356698ce680e385e3ecf07501de695b8", - "size": 94010685 + "uri": "1.0/amazon_reviews_us_Digital_Software_v1_00.tsv.gz", + "sha1Hash": "098fb62c5731161dd1e10298a5d11636253609a1", + "size": 18997604 } } } diff --git a/examples/src/main/java/ai/djl/examples/training/transferlearning/TrainAmazonReviewRanking.java b/examples/src/main/java/ai/djl/examples/training/transferlearning/TrainAmazonReviewRanking.java index 2122b719a52..d87626a1cec 100644 --- a/examples/src/main/java/ai/djl/examples/training/transferlearning/TrainAmazonReviewRanking.java +++ b/examples/src/main/java/ai/djl/examples/training/transferlearning/TrainAmazonReviewRanking.java @@ -125,7 +125,7 @@ public static TrainingResult runExample(String[] args) private static CsvDataset getDataset( Arguments arguments, BertFullTokenizer tokenizer, int maxLength) { String amazonReview = - "https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_Digital_Software_v1_00.tsv.gz"; + "https://mlrepo.djl.ai/dataset/nlp/ai/djl/basicdataset/amazon_reviews/1.0/amazon_reviews_us_Digital_Software_v1_00.tsv.gz"; float paddingToken = tokenizer.getVocabulary().getIndex("[PAD]"); return CsvDataset.builder() .optCsvUrl(amazonReview) diff --git a/jupyter/rank_classification_using_BERT_on_Amazon_Review.ipynb b/jupyter/rank_classification_using_BERT_on_Amazon_Review.ipynb index 411fbbf0ae5..2edbc6c195f 100644 --- a/jupyter/rank_classification_using_BERT_on_Amazon_Review.ipynb +++ b/jupyter/rank_classification_using_BERT_on_Amazon_Review.ipynb @@ -159,7 +159,7 @@ "source": [ "CsvDataset getDataset(int batchSize, BertFullTokenizer tokenizer, int maxLength, int limit) {\n", " String amazonReview =\n", - " \"https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_Digital_Software_v1_00.tsv.gz\";\n", + " \"https://mlrepo.djl.ai/dataset/nlp/ai/djl/basicdataset/amazon_reviews/1.0/amazon_reviews_us_Digital_Software_v1_00.tsv.gz\";\n", " float paddingToken = tokenizer.getVocabulary().getIndex(\"[PAD]\");\n", " return CsvDataset.builder()\n", " .optCsvUrl(amazonReview) // load from Url\n",