diff --git a/README.md b/README.md
index ab42f02..5358a1d 100644
--- a/README.md
+++ b/README.md
@@ -63,6 +63,8 @@ Full doc: [https://mind-network.gitbook.io/mind-lake-sdk](https://mind-network.g
* Initial Release
* v1.0.1
* Add IPFS support
+* v1.0.3
+ * Change connectors to be class
## License
diff --git a/examples/use_case_arweave.ipynb b/examples/use_case_arweave.ipynb
index 461e028..c7c11ce 100644
--- a/examples/use_case_arweave.ipynb
+++ b/examples/use_case_arweave.ipynb
@@ -230,12 +230,12 @@
],
"source": [
"dataPack1 = DataPack(env.walletPrivateKey)\n",
- "result = dataPack1.loadFromCSVFileByDefineColumn(fn_local_csv, \n",
- " [\n",
+ "localFile1 = DataPack.LocalFileConnector(columns=[\n",
" DataPack.Column('uid', DataPack.DataType.int4, False),\n",
" DataPack.Column('wallet_address', DataPack.DataType.text, True),\n",
" DataPack.Column('register_date', DataPack.DataType.timestamp, True)\n",
" ])\n",
+ "result = dataPack1.loadFrom(localFile1, fn_local_csv)\n",
"assert result, result.message\n",
"df = pandas.DataFrame(dataPack1.data)\n",
"df"
@@ -272,7 +272,8 @@
"print(\"dropTable result:\", result.message)\n",
"\n",
"# upload from local csv into mindlake\n",
- "result = dataPack1.saveToMindLake('a_new_table', mindlake)\n",
+ "mindLakeConnector = DataPack.MindLakeConnector(mindlake)\n",
+ "result = dataPack1.saveTo(mindLakeConnector, 'a_new_table')\n",
"assert result, result.message\n",
"print(\"a_new_table has been saved to MindLake.\")"
]
@@ -362,8 +363,8 @@
],
"source": [
"dataPack2 = DataPack(env.walletPrivateKey)\n",
- "# make sure if you complete Step 1, so \"a_new_table\" exists in mindlake \n",
- "result = dataPack2.loadFromMindByQuery('select * from \"a_new_table\"', mindlake)\n",
+ "# make sure if you complete Step 1, so \"a_new_table\" exists in mindlake\n",
+ "result = dataPack2.loadFrom(mindLakeConnector, 'select * from \"a_new_table\"')\n",
"assert result, result.message\n",
"df = pandas.DataFrame(dataPack2.data)\n",
"df"
@@ -388,24 +389,25 @@
"text": [
"test_table_encrypted.csv has been saved to Arweave.\n",
"\n",
- "The Arweave ID is: sMboxrf5U9Z-FM_-woSfiVerz0Lzpa1_OCmRX09Jgn0 \n",
+ "The Arweave ID is: DwyQeo8B7RdoJlqWNGm1WHiKVh-ua5BmclKu9pdX29w \n",
"\n",
- "You can check on Arweave: https://viewblock.io/arweave/tx/sMboxrf5U9Z-FM_-woSfiVerz0Lzpa1_OCmRX09Jgn0\n",
+ "You can check on Arweave: https://viewblock.io/arweave/tx/DwyQeo8B7RdoJlqWNGm1WHiKVh-ua5BmclKu9pdX29w\n",
"\n",
- "And the content on Arweave: https://arseed.web3infra.dev/sMboxrf5U9Z-FM_-woSfiVerz0Lzpa1_OCmRX09Jgn0\n",
+ "And the content on Arweave: https://arseed.web3infra.dev/DwyQeo8B7RdoJlqWNGm1WHiKVh-ua5BmclKu9pdX29w\n",
"\n",
"The content is:\n",
"uid,wallet_address,register_date\n",
- "123,\\x9326bce223092712067ca9dd8d6600fd6d0a8e572261927932f0b222092249804635af229e5bfd2e8ed550fb44539c37fa5f4bc77e48499ce01ed9bec037ea7ddd,\\x992afbd2992fcc46071972b3069b44476bc2316cacbe12a5305839d60ab43dc674\n",
- "124,\\x7726950c54dfd2470b9ef8929ee3c049af498bf99a56756f1824f0146fcda22df0bb392959e42a2c8639604ec301425e31844b8a7b847ae486abd29d6594c0ff7c,\\xcdf8811927b22d47aa10e2e8f367e986461d43242b9f81ff62564424d64f958235\n",
- "125,\\x72ccfbbd92144aed8bd0763e30f63e68ac99f095a5fdcc3772ea4b35ead34ede8fc891e7b8e73ae5d07e25fc3b801e66bbea0a110e9047b3bccfdddeac8f7cbd51,\\xb8138ce9e9feaff5c610f424cc97dfb44d1809ad31ca830fd15039d9d3124679ac\n",
+ "123,\\x645039a18f2d0ecfda0049619267f6e4f658d3c8dea202cf2953eaee63894b0b3af0c159a58036cd81d29d4f07d63f75236243d1bbdfd447a90a68e901a62f4153,\\xc9bd351c80b4784cdb1e26b10a8064946eb62b1d53b2b1f42da3092bf6e2cd054f\n",
+ "124,\\x6e6190fd99e05a04ea63768f435b8913a350876a758869804ae6a8ca824091af8b89b821cb2f9f1a7b4bdf51e90e94d77e41dac7637bf1babcfa7f1ba94f103ff5,\\x69239ddf275ef971a8bd7051988b1f92dd784377df7b4989719322aab31eeb3cb6\n",
+ "125,\\x54c1c6f1bc35bba9a1903e7774ca3ba6646b2aaf0f4a4b92262f8932b92bfb8d3a1699ebbc6b391e3b19983aa7832be12d75d5bdfe128c094e61b7f0a1dbab2e2c,\\x045d590d0ef821fe691ee42e04d066338392adcaf50627e3335250e9886d1a7c02\n",
"\n"
]
}
],
"source": [
- "result = dataPack2.saveToArweave('test_table_encrypted.csv', 'ACNH')\n",
- "# result = dataPack2.saveToArweave('test_table_encrypted.csv', 'AR', './arweave_wallet.json')\n",
+ "arweave1 = DataPack.ArweaveConnector('ACNH', ethWalletPrivateKey=env.walletPrivateKey)\n",
+ "# arweave = DataPack.ArweaveConnector('AR', arweaveWalletFile='./arweave_wallet.json')\n",
+ "result = dataPack2.saveTo(arweave1, 'test_table_encrypted.csv')\n",
"assert result, result.message\n",
"print(\"test_table_encrypted.csv has been saved to Arweave.\\n\")\n",
"itemID = result.data\n",
@@ -504,7 +506,8 @@
],
"source": [
"dataPack3 = DataPack(env.walletPrivateKey)\n",
- "result = dataPack3.loadFromArweave(itemID)\n",
+ "arweave2 = DataPack.ArweaveConnector()\n",
+ "result = dataPack3.loadFrom(arweave2, itemID)\n",
"assert result, result.message\n",
"print(\"You can see file on Arweave are encrypted, but datapack can decrypted it locally\")\n",
"df = pandas.DataFrame(dataPack3.data)\n",
@@ -561,20 +564,20 @@
"
\n",
" 0 | \n",
" 123 | \n",
- " \\x9c851ae97fd6eeb3ae2437c274274b3d773fcdee11f9... | \n",
- " \\x44e6bdad517e634a985702320c26770a9275c79aa9b3... | \n",
+ " \\x3c364d7aed8d63b68701fd00feac41435ff456eb307d... | \n",
+ " \\x35bafd055143484471c52153385b7ee5eedc58b09021... | \n",
"
\n",
" \n",
" 1 | \n",
" 124 | \n",
- " \\xaddba1e7648b04018dabaceb918cebb436dff5e57e61... | \n",
- " \\xb5dac593d9436309272b5b56c16d4db24bab7d9e130a... | \n",
+ " \\xbe1d0f00778d9e15e08672f9494a22c11ef870758572... | \n",
+ " \\x25ceb7256dd9ab8ef563ae220c62df39cf13bf3a7d08... | \n",
"
\n",
" \n",
" 2 | \n",
" 125 | \n",
- " \\x9afc9f24ec67f79105a340e939be6f1808f5a34495f2... | \n",
- " \\x755479852b673a2183e1168734e78378fcf81413c3f2... | \n",
+ " \\xa5805d615740ead33874d7036246cc2aee7665901d3c... | \n",
+ " \\xb541751a94f6b5128af3a5939867531ed5736b0fd7c4... | \n",
"
\n",
" \n",
"\n",
@@ -582,14 +585,14 @@
],
"text/plain": [
" uid wallet_address \\\n",
- "0 123 \\x9c851ae97fd6eeb3ae2437c274274b3d773fcdee11f9... \n",
- "1 124 \\xaddba1e7648b04018dabaceb918cebb436dff5e57e61... \n",
- "2 125 \\x9afc9f24ec67f79105a340e939be6f1808f5a34495f2... \n",
+ "0 123 \\x3c364d7aed8d63b68701fd00feac41435ff456eb307d... \n",
+ "1 124 \\xbe1d0f00778d9e15e08672f9494a22c11ef870758572... \n",
+ "2 125 \\xa5805d615740ead33874d7036246cc2aee7665901d3c... \n",
"\n",
" register_date \n",
- "0 \\x44e6bdad517e634a985702320c26770a9275c79aa9b3... \n",
- "1 \\xb5dac593d9436309272b5b56c16d4db24bab7d9e130a... \n",
- "2 \\x755479852b673a2183e1168734e78378fcf81413c3f2... "
+ "0 \\x35bafd055143484471c52153385b7ee5eedc58b09021... \n",
+ "1 \\x25ceb7256dd9ab8ef563ae220c62df39cf13bf3a7d08... \n",
+ "2 \\xb541751a94f6b5128af3a5939867531ed5736b0fd7c4... "
]
},
"execution_count": 10,
@@ -598,7 +601,8 @@
}
],
"source": [
- "result = dataPack3.saveToLocalFile('test_localtable_encrypted.csv', False)\n",
+ "localFile2 = DataPack.LocalFileConnector()\n",
+ "result = dataPack3.saveTo(localFile2, 'test_localtable_encrypted.csv')\n",
"assert result, result.message\n",
"print(\"test_localtable_encrypted.csv has been saved to local file.\")\n",
"df = pandas.read_csv('test_localtable_encrypted.csv')\n",
diff --git a/examples/use_case_ipfs.ipynb b/examples/use_case_ipfs.ipynb
index c427c26..30aacbe 100644
--- a/examples/use_case_ipfs.ipynb
+++ b/examples/use_case_ipfs.ipynb
@@ -228,12 +228,12 @@
],
"source": [
"dataPack1 = DataPack(env.walletPrivateKey)\n",
- "result = dataPack1.loadFromCSVFileByDefineColumn(fn_local_csv, \n",
- " [\n",
+ "localFile1 = DataPack.LocalFileConnector(columns=[\n",
" DataPack.Column('uid', DataPack.DataType.int4, False),\n",
" DataPack.Column('wallet_address', DataPack.DataType.text, True),\n",
" DataPack.Column('register_date', DataPack.DataType.timestamp, True)\n",
" ])\n",
+ "result = dataPack1.loadFrom(localFile1, fn_local_csv)\n",
"assert result, result.message\n",
"df = pandas.DataFrame(dataPack1.data)\n",
"df"
@@ -270,7 +270,8 @@
"print(\"dropTable result:\", result.message)\n",
"\n",
"# upload from local csv into mindlake\n",
- "result = dataPack1.saveToMindLake('a_new_table', mindlake)\n",
+ "mindLakeConnector = DataPack.MindLakeConnector(mindlake)\n",
+ "result = dataPack1.saveTo(mindLakeConnector, 'a_new_table')\n",
"assert result, result.message\n",
"print(\"a_new_table has been saved to MindLake.\")"
]
@@ -360,8 +361,8 @@
],
"source": [
"dataPack2 = DataPack(env.walletPrivateKey)\n",
- "# make sure if you complete Step 1, so \"a_new_table\" exists in mindlake \n",
- "result = dataPack2.loadFromMindByQuery('select * from \"a_new_table\"', mindlake)\n",
+ "# make sure if you complete Step 1, so \"a_new_table\" exists in mindlake\n",
+ "result = dataPack2.loadFrom(mindLakeConnector, 'select * from \"a_new_table\"')\n",
"assert result, result.message\n",
"df = pandas.DataFrame(dataPack2.data)\n",
"df"
@@ -386,24 +387,25 @@
"text": [
"test_table_encrypted.csv has been saved to IPFS.\n",
"\n",
- "The IPFS ID is: QmYxCi1BBhbch496SDVP58VesPuTnraPgu57S84b1XiteM \n",
+ "The IPFS ID is: QmU1nri3draq7SAHVAjSx91dEVCw1e1LGdv1NaiepGvTBm \n",
"\n",
- "You can check on local IPFS http service: http://127.0.0.1:8080/ipfs/QmYxCi1BBhbch496SDVP58VesPuTnraPgu57S84b1XiteM\n",
+ "You can check on local IPFS http service: http://127.0.0.1:8080/ipfs/QmU1nri3draq7SAHVAjSx91dEVCw1e1LGdv1NaiepGvTBm\n",
"\n",
- "And the content on IPFS: http://127.0.0.1:8080/ipfs/QmYxCi1BBhbch496SDVP58VesPuTnraPgu57S84b1XiteM/test_table_encrypted.csv\n",
+ "And the content on IPFS: http://127.0.0.1:8080/ipfs/QmU1nri3draq7SAHVAjSx91dEVCw1e1LGdv1NaiepGvTBm/test_table_encrypted.csv\n",
"\n",
"The content is:\n",
"uid,wallet_address,register_date\n",
- "123,\\x4810c8a24f2f22243d778860d9ee5180d5375d6cbd22561e568c23bce63f7516d1d2887dcb1993de0133e1d1e002c06b38b56b25d23acac51e24e740f1c6c5e163,\\xf9977c69c6a3d716e615b344f4f66953a4b3d764892a5960460aba34a6c7645943\n",
- "124,\\xa7fca7f580180de2e92df9ddf9608c2442dcfa947ba23e3be37bf4764f981670016df19ca4fb3e7295479de368c45122f2cca35d619a402ac28d437cfd0188233f,\\xf06543016d08a89717d34fea65b66b4b8878ae8f411598c3ce3cbde0795a58d2c7\n",
- "125,\\xd26a30809f578388dfc97a0262190094c086060a380e7b83a5f2e9bd9cd680a6e272c900921ca7c5824391e4f6bc6854e622f59c0d891152e861ff7ba62f542c40,\\xfa94a16e90f53456b9d053537fed04802514b15a3506055acbbd519e67de1d6e1c\n",
+ "123,\\x82148bcab7b5c8fc8f81ce8cb79b307d81bfde84e51fa87ae99d56b9985b8b9018b26eff1b96a38cf148ffe1b923b99be3908a7e6d27d57e78d769af6be82dc92d,\\x0776d01ca9ffa39ea95916108147db031025ce9e3ce7f6cba619041b9fe9c247ee\n",
+ "124,\\x43ad786aca3e01879ee6b7753032842658e6c24a302bf2986b70c247a36bf54724ec914002f930b13fa9de88b072502f887a930b655590090a21d0f6253ab9b5f4,\\xcf0cc270db2108338f9440a0fb810706e31a73b20284226d9bddc40649d66e43f7\n",
+ "125,\\xe49d212633890b86855d663d90ee71ed323b68f67a53fed837c88f037d5fdb4ef5456e596a88a10a6b3d9c347033e4c0c7e1f566017fed5c49d7546fbf76348c8d,\\xb8ec6581139a9c29ce58c85f094ec53e26a23047d6ea3c73edbdcf57500e9d5de7\n",
"\n"
]
}
],
"source": [
"# make sure you have started IPFS service on your local machine\n",
- "result = dataPack2.saveToIPFS('test_table_encrypted.csv')\n",
+ "ipfs1 = DataPack.IPFSConnector()\n",
+ "result = dataPack2.saveTo(ipfs1, 'test_table_encrypted.csv')\n",
"assert result, result.message\n",
"print(\"test_table_encrypted.csv has been saved to IPFS.\\n\")\n",
"ipfsHash = result.data\n",
@@ -502,7 +504,8 @@
],
"source": [
"dataPack3 = DataPack(env.walletPrivateKey)\n",
- "result = dataPack3.loadFromIPFS(ipfsHash)\n",
+ "ipfs2 = DataPack.IPFSConnector()\n",
+ "result = dataPack3.loadFrom(ipfs2, ipfsHash)\n",
"assert result, result.message\n",
"print(\"You can see file on IPFS are encrypted, but datapack can decrypted it locally\")\n",
"df = pandas.DataFrame(dataPack3.data)\n",
@@ -559,20 +562,20 @@
" \n",
" 0 | \n",
" 123 | \n",
- " \\x1295126db9fb5f12c77ca17a7b2230509f05ee225d15... | \n",
- " \\x3f7a44eb7cdd6c1ff9433b4b911a0a400af3aafdc6dd... | \n",
+ " \\x14bb21b7b3105cac85bc84383b3b0b84bf9d0503940d... | \n",
+ " \\x6aa4984ca3bd3682679308b2ef22cf6f12d42698fde0... | \n",
"
\n",
" \n",
" 1 | \n",
" 124 | \n",
- " \\x3c9791f3a72caf6f8f4ed8d699c9efd63eccb60961b4... | \n",
- " \\x83cd814f9a8f2a1e1684a62e64d87f488517526441b3... | \n",
+ " \\x3f87e2905e69ee36419ac126dee7eb23b1b7d3b5ba16... | \n",
+ " \\x9b8355a538a4b2f84824f55d75a098d984145de1371f... | \n",
"
\n",
" \n",
" 2 | \n",
" 125 | \n",
- " \\xe2137c5d87fbaa2fe39f2fea8731992ea7a194b90641... | \n",
- " \\x550d9b3fc3d1edd7866990620a91ac5164c9fe9e30e0... | \n",
+ " \\x7901ba445c75ceb0016914f307707c84a26b655fe7fd... | \n",
+ " \\x7e91fca841863ca2ae32b2b1098b5191fdba7e0e0e6a... | \n",
"
\n",
" \n",
"\n",
@@ -580,14 +583,14 @@
],
"text/plain": [
" uid wallet_address \\\n",
- "0 123 \\x1295126db9fb5f12c77ca17a7b2230509f05ee225d15... \n",
- "1 124 \\x3c9791f3a72caf6f8f4ed8d699c9efd63eccb60961b4... \n",
- "2 125 \\xe2137c5d87fbaa2fe39f2fea8731992ea7a194b90641... \n",
+ "0 123 \\x14bb21b7b3105cac85bc84383b3b0b84bf9d0503940d... \n",
+ "1 124 \\x3f87e2905e69ee36419ac126dee7eb23b1b7d3b5ba16... \n",
+ "2 125 \\x7901ba445c75ceb0016914f307707c84a26b655fe7fd... \n",
"\n",
" register_date \n",
- "0 \\x3f7a44eb7cdd6c1ff9433b4b911a0a400af3aafdc6dd... \n",
- "1 \\x83cd814f9a8f2a1e1684a62e64d87f488517526441b3... \n",
- "2 \\x550d9b3fc3d1edd7866990620a91ac5164c9fe9e30e0... "
+ "0 \\x6aa4984ca3bd3682679308b2ef22cf6f12d42698fde0... \n",
+ "1 \\x9b8355a538a4b2f84824f55d75a098d984145de1371f... \n",
+ "2 \\x7e91fca841863ca2ae32b2b1098b5191fdba7e0e0e6a... "
]
},
"execution_count": 10,
@@ -596,7 +599,8 @@
}
],
"source": [
- "result = dataPack3.saveToLocalFile('test_localtable_encrypted.csv', False)\n",
+ "localFile2 = DataPack.LocalFileConnector()\n",
+ "result = dataPack3.saveTo(localFile2, 'test_localtable_encrypted.csv')\n",
"assert result, result.message\n",
"print(\"test_localtable_encrypted.csv has been saved to local file.\")\n",
"df = pandas.read_csv('test_localtable_encrypted.csv')\n",
diff --git a/examples/use_case_web3storage.ipynb b/examples/use_case_web3storage.ipynb
new file mode 100644
index 0000000..236cf58
--- /dev/null
+++ b/examples/use_case_web3storage.ipynb
@@ -0,0 +1,629 @@
+{
+ "cells": [
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Step 1: configuration"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Input your walletPrivateKey here:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "check env.walletPrivateKey: 2a776\n",
+ "check env.mindLakeAppKey: Kyunq\n",
+ "check env.MINDLAKE_GATEWAY: https://sdk.mindnetwork.xyz/node\n",
+ "check env.web3StorageToken: eyJhb\n"
+ ]
+ }
+ ],
+ "source": [
+ "# make sure you configure mindlakesdk, please check env.py in the same folder\n",
+ "# please familar these configure, and you can read https://github.com/mind-network/mind-lake-sdk-python \n",
+ "# You can get web3StorageToken from https://web3.storage/\n",
+ "import env\n",
+ "\n",
+ "print(\"check env.walletPrivateKey:\", env.walletPrivateKey[0:5])\n",
+ "print(\"check env.mindLakeAppKey:\", env.mindLakeAppKey[0:5])\n",
+ "print(\"check env.MINDLAKE_GATEWAY:\", env.MINDLAKE_GATEWAY)\n",
+ "print(\"check env.web3StorageToken:\", env.web3StorageToken[0:5])"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Install depedancy and source code"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# please \"pip install mindlakesdk\" if not installed\n",
+ "import mindlakesdk\n",
+ "\n",
+ "# please \"pip install minddatapack\" if not installed, or git clone the source code\n",
+ "from minddatapack import DataPack\n",
+ "\n",
+ "# please \"pip install pandas\" if not installed, it is used to display structure content in the notebook\n",
+ "import pandas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# check if example_data.csv file exists in the same folder, this demo will start to load this csv file."
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 2. Local CSV -> MindLake\n",
+ "- Load data from a local CSV file without the metadata file, by defining each column manually.\n",
+ "- You can also load from a local backup into MindLake once you are familar, then no need to define each column. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " uid | \n",
+ " wallet_address | \n",
+ " register_date | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 123 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 124 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 125 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " uid wallet_address register_date\n",
+ "0 123 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n",
+ "1 124 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n",
+ "2 125 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "fn_local_csv = './example_data.csv'\n",
+ "\n",
+ "# preview local csv\n",
+ "df = pandas.read_csv(fn_local_csv)\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 123 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 124 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 125 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2\n",
+ "0 123 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n",
+ "1 124 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n",
+ "2 125 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataPack1 = DataPack(env.walletPrivateKey)\n",
+ "localFile1 = DataPack.LocalFileConnector(columns=[\n",
+ " DataPack.Column('uid', DataPack.DataType.int4, False),\n",
+ " DataPack.Column('wallet_address', DataPack.DataType.text, True),\n",
+ " DataPack.Column('register_date', DataPack.DataType.timestamp, True)\n",
+ " ])\n",
+ "result = dataPack1.loadFrom(localFile1, fn_local_csv)\n",
+ "assert result, result.message\n",
+ "df = pandas.DataFrame(dataPack1.data)\n",
+ "df"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Save the data into a table in MindLake"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "dropTable result: Success\n",
+ "a_new_table has been saved to MindLake.\n"
+ ]
+ }
+ ],
+ "source": [
+ "mindlake = mindlakesdk.connect(env.walletPrivateKey, env.mindLakeAppKey, env.MINDLAKE_GATEWAY)\n",
+ "assert mindlake, mindlake.message\n",
+ "\n",
+ "# drop the table if exists\n",
+ "result = mindlake.datalake.dropTable('a_new_table')\n",
+ "print(\"dropTable result:\", result.message)\n",
+ "\n",
+ "# upload from local csv into mindlake\n",
+ "mindLakeConnector = DataPack.MindLakeConnector(mindlake)\n",
+ "result = dataPack1.saveTo(mindLakeConnector, 'a_new_table')\n",
+ "assert result, result.message\n",
+ "print(\"a_new_table has been saved to MindLake.\")"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "you can also check this new downloaded table in https://scan.mindnetwork.xyz/account/myData"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 3. MindLake -> Web3Storage\n",
+ "- Load data from a table in MindLake"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 123 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 124 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 125 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2\n",
+ "0 123 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n",
+ "1 124 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n",
+ "2 125 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataPack2 = DataPack(env.walletPrivateKey)\n",
+ "# make sure if you complete Step 1, so \"a_new_table\" exists in mindlake\n",
+ "result = dataPack2.loadFrom(mindLakeConnector, 'select * from \"a_new_table\"')\n",
+ "assert result, result.message\n",
+ "df = pandas.DataFrame(dataPack2.data)\n",
+ "df"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Save the data into Web3Storage"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "test_table_encrypted.csv has been saved to Web3Storage.\n",
+ "\n",
+ "The IPFS CID is: bafkreigsgm7gmydxagcuo3w6jtnsm36kvdoiml3dtfjomy6ksioqe6d3n4 \n",
+ "\n",
+ "You can download the archive file from: https://w3s.link/ipfs/bafkreigsgm7gmydxagcuo3w6jtnsm36kvdoiml3dtfjomy6ksioqe6d3n4\n",
+ "\n",
+ "Here is storage info of the archive file:\n",
+ "{\"_id\":\"315318962443259193\",\"type\":\"Upload\",\"name\":\"Upload at 2023-09-03T09:09:01.164Z\",\"created\":\"2023-09-03T09:09:01.164+00:00\",\"updated\":\"2023-09-03T09:09:01.164+00:00\",\"cid\":\"bafkreigsgm7gmydxagcuo3w6jtnsm36kvdoiml3dtfjomy6ksioqe6d3n4\",\"dagSize\":1626,\"pins\":[{\"status\":\"Pinned\",\"updated\":\"2023-09-03T09:09:01.164+00:00\",\"peerId\":\"bafzbeibhqavlasjc7dvbiopygwncnrtvjd2xmryk5laib7zyjor6kf3avm\",\"peerName\":\"elastic-ipfs\",\"region\":null}],\"deals\":[]}\n"
+ ]
+ }
+ ],
+ "source": [
+ "web3Storage1 = DataPack.Web3StorageConnector(env.web3StorageToken)\n",
+ "result = dataPack2.saveTo(web3Storage1, 'test_table_encrypted.csv')\n",
+ "assert result, result.message\n",
+ "print(\"test_table_encrypted.csv has been saved to Web3Storage.\\n\")\n",
+ "ipfsCid = result.data\n",
+ "print(\"The IPFS CID is:\", ipfsCid, '\\n')\n",
+ "ipfs_file_url = \"https://w3s.link/ipfs/%s\"%ipfsCid\n",
+ "ipfs_file_info_url = \"https://api.web3.storage/user/uploads/%s\"%ipfsCid\n",
+ "print(f\"You can download the archive file from: {ipfs_file_url}\\n\")\n",
+ "print(\"Here is storage info of the archive file:\")\n",
+ "import requests\n",
+ "headers = {\n",
+ " \"Authorization\": \"Bearer \" + env.web3StorageToken\n",
+ "}\n",
+ "content = requests.get(ipfs_file_info_url, headers=headers).text\n",
+ "print(content)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 4. Web3Storage -> Local File\n",
+ "- Load data from Web3Storage and save to local file"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "You can see file on Web3Storage are encrypted, but datapack can decrypted it locally\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 123 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 124 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 125 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2\n",
+ "0 123 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n",
+ "1 124 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n",
+ "2 125 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataPack3 = DataPack(env.walletPrivateKey)\n",
+ "web3Storage2 = DataPack.Web3StorageConnector()\n",
+ "result = dataPack3.loadFrom(web3Storage2, ipfsCid)\n",
+ "assert result, result.message\n",
+ "print(\"You can see file on Web3Storage are encrypted, but datapack can decrypted it locally\")\n",
+ "df = pandas.DataFrame(dataPack3.data)\n",
+ "df"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Save the data into a local file with encryption"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "test_localtable_encrypted.csv has been saved to local file.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " uid | \n",
+ " wallet_address | \n",
+ " register_date | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 123 | \n",
+ " \\xec58be677e5c22bac7ea266df234565fe56506aa0d5e... | \n",
+ " \\x1ef6e317b5cbfe4568fc648dad1bb9e7e8e872005071... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 124 | \n",
+ " \\xa55d391505fbb845d77990d3c415dbed2d758b7d3977... | \n",
+ " \\xd460941b3d1cbb9bbd1014a02712b18a918b869af046... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 125 | \n",
+ " \\xcc8e1cffefa3029ecb20518c098254aa421d8904f92a... | \n",
+ " \\xe8d4aac12f92c75cd1589b028a7ae5f3e9c47c1259f6... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " uid wallet_address \\\n",
+ "0 123 \\xec58be677e5c22bac7ea266df234565fe56506aa0d5e... \n",
+ "1 124 \\xa55d391505fbb845d77990d3c415dbed2d758b7d3977... \n",
+ "2 125 \\xcc8e1cffefa3029ecb20518c098254aa421d8904f92a... \n",
+ "\n",
+ " register_date \n",
+ "0 \\x1ef6e317b5cbfe4568fc648dad1bb9e7e8e872005071... \n",
+ "1 \\xd460941b3d1cbb9bbd1014a02712b18a918b869af046... \n",
+ "2 \\xe8d4aac12f92c75cd1589b028a7ae5f3e9c47c1259f6... "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "localFile2 = DataPack.LocalFileConnector()\n",
+ "result = dataPack3.saveTo(localFile2, 'test_localtable_encrypted.csv')\n",
+ "assert result, result.message\n",
+ "print(\"test_localtable_encrypted.csv has been saved to local file.\")\n",
+ "df = pandas.read_csv('test_localtable_encrypted.csv')\n",
+ "df"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "datapack-dev",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.17"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/minddatapack/__init__.py b/minddatapack/__init__.py
index 7d3414a..8de1fa3 100644
--- a/minddatapack/__init__.py
+++ b/minddatapack/__init__.py
@@ -4,15 +4,21 @@
from mindlakesdk.utils import ResultType, DataType
from web3 import Web3
import importlib.metadata
-import minddatapack.arweaveconnector
+from minddatapack.arweaveconnector import ArweaveConnector
+from minddatapack.ipfsconnector import IPFSConnector
+from minddatapack.web3storageconnector import Web3StorageConnector
import minddatapack.mindlakeconnector
import minddatapack.localfileconnector
-import minddatapack.ipfsconnector
-from minddatapack.utils import Column
+from minddatapack.utils import Column, Connector
class DataPack:
DataType = DataType
Column = Column
+ IPFSConnector = IPFSConnector
+ Web3StorageConnector = Web3StorageConnector
+ ArweaveConnector = ArweaveConnector
+ LocalFileConnector = minddatapack.localfileconnector.LocalFileConnector
+ MindLakeConnector = minddatapack.mindlakeconnector.MindLakeConnector
def __init__(self, walletPrivateKey: str):
self.existData = False
@@ -20,38 +26,35 @@ def __init__(self, walletPrivateKey: str):
self.data = None
self.columnName = None
self.fileName = None
- self.primaryKey = None
- self.version = importlib.metadata.version('minddatapack')
- self.__walletPrivateKey = walletPrivateKey
+ # self.primaryKey = None
+ try:
+ self.version = importlib.metadata.version('minddatapack')
+ except:
+ self.version = '0.0.0'
web3 = Web3(Web3.HTTPProvider(mindlakesdk.settings.WEB3API))
self.__walletAccount = web3.eth.account.from_key(walletPrivateKey)
- def loadFromMindByQuery(self, sqlStatement: str, mindLake: mindlakesdk.MindLake) -> ResultType:
+ def saveTo(self, connector: Connector, name: str) -> ResultType:
+ return connector.save(self, name)
+
+ def loadFrom(self, connector: Connector, identifier: str) -> ResultType:
+ return connector.load(self, identifier)
+
+ def _loadFromMindByQuery(self, sqlStatement: str, mindLake: mindlakesdk.MindLake) -> ResultType:
result, self.__columns = minddatapack.mindlakeconnector.loadFromMindByQuery(self, sqlStatement, mindLake)
return result
- def saveToMindLake(self, tableName: str, mindLake: mindlakesdk.MindLake) -> ResultType:
+ def _saveToMindLake(self, tableName: str, mindLake: mindlakesdk.MindLake) -> ResultType:
return minddatapack.mindlakeconnector.saveToMindLake(tableName, mindLake, self.data, self.__columns)
- def saveToLocalFile(self, filePath: str, ignoreEncrypt: bool = False):
+ def _saveToLocalFile(self, filePath: str, ignoreEncrypt: bool = False):
return minddatapack.localfileconnector.saveToLocalFile(self, filePath, ignoreEncrypt, self.__columns, self.__walletAccount)
- def loadFromLocalFile(self, filePath: str):
+ def _loadFromLocalFile(self, filePath: str):
result, self.__columns = minddatapack.localfileconnector.loadFromLocalFile(self, filePath, self.__walletAccount)
return result
- def loadFromCSVFileByDefineColumn(self, csvFilePath: str, columns: list):
+ def _loadFromCSVFileByDefineColumn(self, csvFilePath: str, columns: list):
self.__columns = columns
return minddatapack.localfileconnector.loadFromCSVFileByDefineColumn(self, csvFilePath, columns)
-
- def saveToArweave(self, fileName: str, tokenName: str, arWalletFile: str = None):
- return minddatapack.arweaveconnector.saveToArweave(self, fileName, tokenName, arWalletFile, self.__walletPrivateKey)
-
- def loadFromArweave(self, id: str, arGateway: str = 'https://arseed.web3infra.dev/'):
- return minddatapack.arweaveconnector.loadFromArweave(self, id, arGateway)
-
- def saveToIPFS(self, fileName: str, apiEndpoint: str = 'http://localhost:5001', apiKey: str = None, apiSecret: str = None):
- return minddatapack.ipfsconnector.saveToIPFS(self, fileName, apiEndpoint, apiKey, apiSecret)
-
- def loadFromIPFS(self, ipfsCID: str, apiEndpoint: str = 'http://localhost:5001', apiKey: str = None, apiSecret: str = None):
- return minddatapack.ipfsconnector.loadFromIPFS(self, ipfsCID, apiEndpoint, apiKey, apiSecret)
+
\ No newline at end of file
diff --git a/minddatapack/arweaveconnector.py b/minddatapack/arweaveconnector.py
index 8012f4d..30361f3 100644
--- a/minddatapack/arweaveconnector.py
+++ b/minddatapack/arweaveconnector.py
@@ -6,64 +6,76 @@
from mindlakesdk.utils import ResultType
import minddatapack.utils
-def saveToArweave(dataPack, fileName: str, tokenName: str, arWalletFile: str, ethWalletPrivateKey: str):
- result = dataPack.saveToLocalFile(fileName)
- if not result:
- return result
- dataFileName = fileName
- metaFileName = fileName + minddatapack.utils.METADATA_EXT
- try:
- import arseeding, everpay
- if arWalletFile:
- signer = everpay.ARSigner(arWalletFile)
+class ArweaveConnector(minddatapack.utils.Connector):
+ def __init__(self, tokenName: str = None, arWalletFile: str = None, ethWalletPrivateKey: str = None, arGateway: str = None):
+ self.tokenName = tokenName
+ self.arWalletFile = arWalletFile
+ self.ethWalletPrivateKey = ethWalletPrivateKey
+ if arGateway:
+ if arGateway[-1] != '/':
+ arGateway += '/'
+ self.arGateway = arGateway
else:
- signer = everpay.ETHSigner(ethWalletPrivateKey)
- with open(dataFileName, 'rb') as dataFile:
- with open(metaFileName, 'r') as metaFile:
- data = dataFile.read()
- metadata = metaFile.read()
- order = arseeding.send_and_pay(signer, tokenName, data, tags= [
- {'name': 'App-Name', 'value': 'Mind-DataPack'},
- {'name': 'MindDataPackMetaData', 'value': metadata},
- {'name': 'Content-Type', 'value': 'text/csv'}])
- return ResultType(0, "Success", order['itemId'])
- except Exception as e:
- logging.debug("Exception:", str(e))
- return ResultType(60014, "Fail to connect to Arweave", None)
- finally:
- if os.path.exists(dataFileName):
- os.remove(dataFileName)
- if os.path.exists(metaFileName):
- os.remove(metaFileName)
+ self.arGateway = 'https://arseed.web3infra.dev/'
-def loadFromArweave(dataPack, id: str, arGateway: str):
- cacheDataFileName = minddatapack.utils.CACHE_PREFIX + datetime.now().strftime("%Y%m%d%H%M%S%f") + '.csv'
- cacheMetaFileName = cacheDataFileName + minddatapack.utils.METADATA_EXT
- try:
- if arGateway[-1] != '/':
- arGateway += '/'
- metaUrl = arGateway + 'bundle/tx/' + id
- metaResponse = requests.get(metaUrl)
- if metaResponse and metaResponse.status_code == 200:
- txMeta = json.loads(metaResponse.text)
- metadataJsonStr = txMeta['tags'][1]['value']
- with open(cacheMetaFileName, 'wb') as file:
- file.write(metadataJsonStr.encode('utf-8'))
-
- dataUrl = arGateway + id
- dataResponse = requests.get(dataUrl)
- if dataResponse and dataResponse.status_code == 200:
- with open(cacheDataFileName, 'wb') as file:
- file.write(dataResponse.content)
+ def save(self, dataPack: "minddatapack.DataPack", fileName: str) -> ResultType:
+ result = dataPack._saveToLocalFile(fileName)
+ if not result:
+ return result
+ dataFileName = fileName
+ metaFileName = fileName + minddatapack.utils.METADATA_EXT
+ try:
+ import arseeding, everpay
+ if self.arWalletFile:
+ signer = everpay.ARSigner(self.arWalletFile)
+ else:
+ signer = everpay.ETHSigner(self.ethWalletPrivateKey)
+ with open(dataFileName, 'rb') as dataFile:
+ with open(metaFileName, 'r') as metaFile:
+ data = dataFile.read()
+ metadata = metaFile.read()
+ order = arseeding.send_and_pay(signer, self.tokenName, data, tags= [
+ {'name': 'App-Name', 'value': 'Mind-DataPack'},
+ {'name': 'MindDataPackMetaData', 'value': metadata},
+ {'name': 'Content-Type', 'value': 'text/csv'}])
+ return ResultType(0, "Success", order['itemId'])
+ except Exception as e:
+ logging.debug("Exception:", str(e))
+ return ResultType(60014, "Fail to connect to Arweave", None)
+ finally:
+ if os.path.exists(dataFileName):
+ os.remove(dataFileName)
+ if os.path.exists(metaFileName):
+ os.remove(metaFileName)
- return dataPack.loadFromLocalFile(cacheDataFileName)
- else:
- return ResultType(60001, "Network error", None)
- except Exception as e:
- logging.debug("Exception:", str(e))
- return ResultType(60014, "Fail to connect to Arweave", None)
- finally:
- if os.path.exists(cacheDataFileName):
- os.remove(cacheDataFileName)
- if os.path.exists(cacheMetaFileName):
- os.remove(cacheMetaFileName)
+ def load(self, dataPack: "minddatapack.DataPack", transactionID: str) -> ResultType:
+ cacheDataFileName = minddatapack.utils.CACHE_PREFIX + datetime.now().strftime("%Y%m%d%H%M%S%f") + '.csv'
+ cacheMetaFileName = cacheDataFileName + minddatapack.utils.METADATA_EXT
+ try:
+ metaUrl = self.arGateway + 'bundle/tx/' + transactionID
+ metaResponse = requests.get(metaUrl)
+ if metaResponse and metaResponse.status_code == 200:
+ txMeta = json.loads(metaResponse.text)
+ for tag in txMeta['tags']:
+ if tag['name'] == 'MindDataPackMetaData':
+ metadataJsonStr = tag['value']
+ with open(cacheMetaFileName, 'wb') as file:
+ file.write(metadataJsonStr.encode('utf-8'))
+
+ dataUrl = self.arGateway + transactionID
+ dataResponse = requests.get(dataUrl)
+ if dataResponse and dataResponse.status_code == 200:
+ with open(cacheDataFileName, 'wb') as file:
+ file.write(dataResponse.content)
+
+ return dataPack._loadFromLocalFile(cacheDataFileName)
+ else:
+ return ResultType(60001, "Network error", None)
+ except Exception as e:
+ logging.debug("Exception:", str(e))
+ return ResultType(60014, "Fail to connect to Arweave", None)
+ finally:
+ if os.path.exists(cacheDataFileName):
+ os.remove(cacheDataFileName)
+ if os.path.exists(cacheMetaFileName):
+ os.remove(cacheMetaFileName)
diff --git a/minddatapack/ipfsconnector.py b/minddatapack/ipfsconnector.py
index 69db82b..b89b86c 100644
--- a/minddatapack/ipfsconnector.py
+++ b/minddatapack/ipfsconnector.py
@@ -7,79 +7,86 @@
from mindlakesdk.utils import ResultType
import minddatapack.utils
+class IPFSConnector(minddatapack.utils.Connector):
+ def __init__(self, apiEndpoint: str = None, apiKey: str = None, apiSecret: str = None):
+ self.apiEndpoint = apiEndpoint
+ if not self.apiEndpoint:
+ self.apiEndpoint = 'http://localhost:5001'
+ self.apiKey = apiKey
+ self.apiSecret = apiSecret
-def saveToIPFS(dataPack, fileName: str, apiEndpoint: str, apiKey: str, apiSecret: str) -> ResultType:
- result = dataPack.saveToLocalFile(fileName)
- if not result:
- return result
- metaFileName = fileName + minddatapack.utils.METADATA_EXT
- try:
- csvFile = open(fileName, 'rb')
- metaFile = open(metaFileName, 'rb')
- files = {}
- files[fileName] = csvFile
- files[metaFileName] = metaFile
- if apiKey and apiSecret:
- response = requests.post(apiEndpoint + '/api/v0/add?pin=true&wrap-with-directory=true', files=files, auth=(apiKey,apiSecret))
- else:
- response = requests.post(apiEndpoint + '/api/v0/add?pin=true&wrap-with-directory=true', files=files)
- if response and response.status_code == 200:
- folderJson = response.text.splitlines()[-1]
- ipfsHash = json.loads(folderJson)['Hash']
- return ResultType(0, "Success", ipfsHash)
- else:
- return ResultType(60001, "Network error", None)
- except Exception as e:
- logging.debug("Exception:", str(e))
- return ResultType(60014, "Fail to connect to IPFS", None)
- finally:
- if csvFile:
- csvFile.close()
- if metaFile:
- metaFile.close()
- if os.path.exists(fileName):
- os.remove(fileName)
- if os.path.exists(metaFileName):
- os.remove(metaFileName)
+ def save(self, dataPack: "minddatapack.DataPack", fileName: str) -> ResultType:
+ result = dataPack._saveToLocalFile(fileName)
+ if not result:
+ return result
+ metaFileName = fileName + minddatapack.utils.METADATA_EXT
+ try:
+ csvFile = open(fileName, 'rb')
+ metaFile = open(metaFileName, 'rb')
+ files = {}
+ files[fileName] = csvFile
+ files[metaFileName] = metaFile
+ if self.apiKey and self.apiSecret:
+ response = requests.post(self.apiEndpoint + '/api/v0/add?pin=true&wrap-with-directory=true', files=files, auth=(self.apiKey,self.apiSecret))
+ else:
+ response = requests.post(self.apiEndpoint + '/api/v0/add?pin=true&wrap-with-directory=true', files=files)
+ if response and response.status_code == 200:
+ folderJson = response.text.splitlines()[-1]
+ ipfsHash = json.loads(folderJson)['Hash']
+ return ResultType(0, "Success", ipfsHash)
+ else:
+ return ResultType(60001, "Network error", None)
+ except Exception as e:
+ logging.debug("Exception:", str(e))
+ return ResultType(60014, "Fail to connect to IPFS", None)
+ finally:
+ if csvFile:
+ csvFile.close()
+ if metaFile:
+ metaFile.close()
+ if os.path.exists(fileName):
+ os.remove(fileName)
+ if os.path.exists(metaFileName):
+ os.remove(metaFileName)
-def loadFromIPFS(dataPack, ipfsCID: str, apiEndpoint: str, apiKey: str, apiSecret: str):
- cacheTarFileName = minddatapack.utils.CACHE_PREFIX + datetime.now().strftime("%Y%m%d%H%M%S%f") + '.tar.gz'
- metaFileName = None
- dataFileName = None
- try:
- if apiKey and apiSecret:
- response = requests.post(apiEndpoint + f'/api/v0/get?arg={ipfsCID}&archive=true&compress=true&compression-level=6', auth=(apiKey,apiSecret))
- else:
- response = requests.post(apiEndpoint + f'/api/v0/get?arg={ipfsCID}&archive=true&compress=true&compression-level=6')
- if response and response.status_code == 200:
- with open(cacheTarFileName, 'wb') as file:
- file.write(response.content)
- with tarfile.open(cacheTarFileName, "r:gz") as tar:
- members = tar.getmembers()
- if len(members) != 3:
- return ResultType(60015, "Invalid DataPack data", None)
- for member in members:
- nameSplit = member.name.split('/')
- if len(nameSplit) == 2:
- if member.name.endswith(minddatapack.utils.METADATA_EXT):
- metaFileName = member.name
- elif member.name.endswith('.csv'):
- dataFileName = member.name
- tar.extract(member)
- if metaFileName != dataFileName + minddatapack.utils.METADATA_EXT:
- return ResultType(60015, "Invalid DataPack data", None)
- return dataPack.loadFromLocalFile(dataFileName)
- else:
- return ResultType(60001, "Network error", None)
- except Exception as e:
- logging.debug("Exception:", str(e))
- return ResultType(60014, "Fail to connect to IPFS", None)
- finally:
- if os.path.exists(cacheTarFileName):
- os.remove(cacheTarFileName)
- if dataFileName and os.path.exists(dataFileName):
- os.remove(dataFileName)
- if metaFileName and os.path.exists(metaFileName):
- os.remove(metaFileName)
- if os.path.exists(ipfsCID):
- os.rmdir(ipfsCID)
+ def load(self, dataPack: "minddatapack.DataPack", ipfsCID: str) -> ResultType:
+ cacheTarFileName = minddatapack.utils.CACHE_PREFIX + datetime.now().strftime("%Y%m%d%H%M%S%f") + '.tar.gz'
+ metaFileName = None
+ dataFileName = None
+ try:
+ if self.apiKey and self.apiSecret:
+ response = requests.post(self.apiEndpoint + f'/api/v0/get?arg={ipfsCID}&archive=true&compress=true&compression-level=6', auth=(self.apiKey,self.apiSecret))
+ else:
+ response = requests.post(self.apiEndpoint + f'/api/v0/get?arg={ipfsCID}&archive=true&compress=true&compression-level=6')
+ if response and response.status_code == 200:
+ with open(cacheTarFileName, 'wb') as file:
+ file.write(response.content)
+ with tarfile.open(cacheTarFileName, "r:gz") as tar:
+ members = tar.getmembers()
+ if len(members) != 3:
+ return ResultType(60015, "Invalid DataPack data", None)
+ for member in members:
+ nameSplit = member.name.split('/')
+ if len(nameSplit) == 2:
+ if member.name.endswith(minddatapack.utils.METADATA_EXT):
+ metaFileName = member.name
+ elif member.name.endswith('.csv'):
+ dataFileName = member.name
+ tar.extract(member)
+ if metaFileName != dataFileName + minddatapack.utils.METADATA_EXT:
+ return ResultType(60015, "Invalid DataPack data", None)
+ return dataPack._loadFromLocalFile(dataFileName)
+ else:
+ return ResultType(60001, "Network error", None)
+ except Exception as e:
+ logging.debug("Exception:", str(e))
+ return ResultType(60014, "Fail to connect to IPFS", None)
+ finally:
+ if os.path.exists(cacheTarFileName):
+ os.remove(cacheTarFileName)
+ if dataFileName and os.path.exists(dataFileName):
+ os.remove(dataFileName)
+ if metaFileName and os.path.exists(metaFileName):
+ os.remove(metaFileName)
+ if os.path.exists(ipfsCID):
+ os.rmdir(ipfsCID)
diff --git a/minddatapack/localfileconnector.py b/minddatapack/localfileconnector.py
index 1337df8..90fa568 100644
--- a/minddatapack/localfileconnector.py
+++ b/minddatapack/localfileconnector.py
@@ -13,6 +13,20 @@
from mindlakesdk.utils import ResultType, DataType
import minddatapack.utils
+class LocalFileConnector(minddatapack.utils.Connector):
+ def __init__(self, ignoreEncrypt: bool = False, columns: list = None):
+ self.ignoreEncrypt = ignoreEncrypt
+ self.columns = columns
+
+ def save(self, dataPack: "minddatapack.DataPack", filePath: str) -> ResultType:
+ return dataPack._saveToLocalFile(filePath, self.ignoreEncrypt)
+
+ def load(self, dataPack: "minddatapack.DataPack", filePath: str) -> ResultType:
+ if self.columns:
+ return dataPack._loadFromCSVFileByDefineColumn(filePath, self.columns)
+ else:
+ return dataPack._loadFromLocalFile(filePath)
+
def __encrypt(data, columnMeta: minddatapack.utils.Column) -> ResultType:
try:
data = __encodeByDataType(data, columnMeta.type)
@@ -49,7 +63,7 @@ def __decrypt(cipher: str, columnMeta: minddatapack.utils.Column) -> ResultType:
return ResultType(60013, 'Decrypt data failed')
return ResultType(0, "Success", data)
-def saveToLocalFile(dataPack, filePath: str, ignoreEncrypt: bool, columns: list, walletAccount):
+def saveToLocalFile(dataPack: "minddatapack.DataPack", filePath: str, ignoreEncrypt: bool, columns: list, walletAccount):
if not dataPack.existData:
return ResultType(60006, 'No data in DataPack')
dataPack.fileName = os.path.basename(filePath)
@@ -102,13 +116,13 @@ def __buildMetadata(fileName: str, ignoreEncrypt: bool, fileHash: str, columns:
metadata['Column'].append(columnMeta)
return metadata
-def loadFromLocalFile(dataPack, filePath: str, walletAccount):
+def loadFromLocalFile(dataPack: "minddatapack.DataPack", filePath: str, walletAccount):
# the path of the meta file should be filePath + '.meta.json'
metaFilePath = filePath + minddatapack.utils.METADATA_EXT
if not os.path.exists(filePath):
- return ResultType(60007, 'CSV File not found')
+ return ResultType(60007, 'CSV File not found'), None
if not os.path.exists(metaFilePath):
- return ResultType(60008, 'Metadata file not found')
+ return ResultType(60008, 'Metadata file not found'), None
with open(metaFilePath, 'r') as file:
metadata = json.load(file)
dataPack.fileName = metadata['FileName']
@@ -133,7 +147,7 @@ def loadFromLocalFile(dataPack, filePath: str, walletAccount):
if not ignoreEncrypt and columns[index].encrypt:
decryptResult = __decrypt(cell, columns[index])
if not decryptResult:
- return decryptResult
+ return decryptResult, None
rowDecoded.append(decryptResult.data)
else:
if columns[index].type == DataType.int4 or columns[index].type == DataType.int8:
@@ -143,14 +157,19 @@ def loadFromLocalFile(dataPack, filePath: str, walletAccount):
elif columns[index].type == DataType.decimal:
rowDecoded.append(Decimal(cell))
elif columns[index].type == DataType.timestamp:
- rowDecoded.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S.%f'))
+ if(len(cell) == 19):
+ rowDecoded.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S'))
+ elif(len(cell) == 26):
+ rowDecoded.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S.%f'))
+ else:
+ return ResultType(60015, 'Invalid timestamp format'), None
else:
rowDecoded.append(cell)
dataPack.data.append(rowDecoded)
dataPack.existData = True
return ResultType(0, "Success"), columns
-def loadFromCSVFileByDefineColumn(dataPack, csvFilePath: str, columns: list):
+def loadFromCSVFileByDefineColumn(dataPack: "minddatapack.DataPack", csvFilePath: str, columns: list):
# the whole csv file should be in plain text
if not os.path.exists(csvFilePath):
return ResultType(60007, 'CSV File not found')
@@ -178,7 +197,12 @@ def loadFromCSVFileByDefineColumn(dataPack, csvFilePath: str, columns: list):
elif columns[index].type == DataType.decimal:
formattedRow.append(Decimal(cell))
elif columns[index].type == DataType.timestamp:
- formattedRow.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S.%f'))
+ if(len(cell) == 19):
+ formattedRow.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S'))
+ elif(len(cell) == 26):
+ formattedRow.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S.%f'))
+ else:
+ return ResultType(60015, 'Invalid timestamp format'), None
else:
formattedRow.append(cell)
dataPack.data.append(formattedRow)
diff --git a/minddatapack/mindlakeconnector.py b/minddatapack/mindlakeconnector.py
index 7d892b6..2e97320 100644
--- a/minddatapack/mindlakeconnector.py
+++ b/minddatapack/mindlakeconnector.py
@@ -1,7 +1,18 @@
+import datetime
import mindlakesdk
from mindlakesdk.utils import ResultType, DataType
import minddatapack.utils
+class MindLakeConnector(minddatapack.utils.Connector):
+ def __init__(self, mindLake: mindlakesdk.MindLake):
+ self.mindLake = mindLake
+
+ def save(self, dataPack: "minddatapack.DataPack", tableName: str) -> ResultType:
+ return dataPack._saveToMindLake(tableName, self.mindLake)
+
+ def load(self, dataPack: "minddatapack.DataPack", sqlStatement: str) -> ResultType:
+ return dataPack._loadFromMindByQuery(sqlStatement, self.mindLake)
+
def loadFromMindByQuery(dataPack, sqlStatement: str, mindLake: mindlakesdk.MindLake) -> ResultType:
result = mindLake.datalake.queryForDataAndMeta(sqlStatement)
if not result:
@@ -31,6 +42,13 @@ def loadFromMindByQuery(dataPack, sqlStatement: str, mindLake: mindlakesdk.MindL
if not decryptResult:
return decryptResult
rowResult.append(decryptResult.data)
+ elif columns[index].type == DataType.timestamp:
+ if(len(cell) == 19):
+ rowResult.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S'))
+ elif(len(cell) == 26):
+ rowResult.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S.%f'))
+ else:
+ return ResultType(60015, 'Invalid timestamp format'), None
else:
rowResult.append(cell)
dataPack.data.append(rowResult)
@@ -51,8 +69,10 @@ def saveToMindLake(tableName: str, mindLake: mindlakesdk.MindLake, data, columns
return encryptResult
insert_value = insert_value + "'" + encryptResult.data + "',"
else:
- if column.type == DataType.text or column.type == DataType.timestamp:
- insert_value = insert_value + "'" + str(cell) + "',"
+ if column.type == DataType.text:
+ insert_value = insert_value + "'" + cell + "',"
+ elif column.type == DataType.timestamp:
+ insert_value = insert_value + "'" + cell.strftime('%Y-%m-%d %H:%M:%S.%f') + "',"
else:
insert_value = insert_value + str(cell) + ","
result = mindLake.datalake.query(f'INSERT INTO "{tableName}" VALUES ({insert_value[:-1]})')
diff --git a/minddatapack/utils.py b/minddatapack/utils.py
index f09d688..907ee7c 100644
--- a/minddatapack/utils.py
+++ b/minddatapack/utils.py
@@ -1,6 +1,7 @@
import mindlakesdk
from mindlakesdk.utils import ResultType, DataType
from mindlakesdk.datalake import DataLake
+from abc import ABC, abstractmethod
METADATA_EXT = '.meta.json'
CACHE_PREFIX = 'datapack_cache_'
@@ -14,4 +15,17 @@ def __init__(self, columnName: str, dataType: DataType, encrypt: bool, dataKey:
else:
self.dataKey = mindlakesdk.utils.genAESKey()
else:
- self.dataKey = None
\ No newline at end of file
+ self.dataKey = None
+
+class Connector(ABC):
+ @abstractmethod
+ def __init__(self):
+ raise NotImplementedError
+
+ @abstractmethod
+ def save(self, dataPack, name: str) -> ResultType:
+ raise NotImplementedError
+
+ @abstractmethod
+ def load(self, dataPack, identifier: str) -> ResultType:
+ raise NotImplementedError
\ No newline at end of file
diff --git a/minddatapack/web3storageconnector.py b/minddatapack/web3storageconnector.py
new file mode 100644
index 0000000..5e4592b
--- /dev/null
+++ b/minddatapack/web3storageconnector.py
@@ -0,0 +1,84 @@
+from datetime import datetime
+import zipfile
+import requests
+import json
+import logging
+import os
+from mindlakesdk.utils import ResultType
+import minddatapack.utils
+
+class Web3StorageConnector(minddatapack.utils.Connector):
+ def __init__(self, apiToken: str = None):
+ self.apiToken = apiToken
+
+ def save(self, dataPack: "minddatapack.DataPack", fileName: str) -> ResultType:
+ cacheZipFileName = minddatapack.utils.CACHE_PREFIX + datetime.now().strftime("%Y%m%d%H%M%S%f") + '.zip'
+ result = dataPack._saveToLocalFile(fileName)
+ if not result:
+ return result
+ metaFileName = fileName + minddatapack.utils.METADATA_EXT
+ try:
+ with zipfile.ZipFile(cacheZipFileName, 'w') as zip:
+ zip.write(fileName)
+ zip.write(metaFileName)
+ zipFile = open(cacheZipFileName, 'rb')
+ files = [
+ ('file', (cacheZipFileName, zipFile, 'application/zip'))
+ ]
+ headers = {
+ "Authorization": "Bearer " + self.apiToken
+ }
+ response = requests.post('https://api.web3.storage/upload', headers=headers, files=files)
+ if response and response.status_code == 200:
+ ipfsHash = json.loads(response.text)['cid']
+ return ResultType(0, "Success", ipfsHash)
+ else:
+ return ResultType(60001, "Network error", None)
+ except Exception as e:
+ logging.debug("Exception:", str(e))
+ return ResultType(60014, "Fail to connect to IPFS", None)
+ finally:
+ if os.path.exists(fileName):
+ os.remove(fileName)
+ if os.path.exists(metaFileName):
+ os.remove(metaFileName)
+ if os.path.exists(cacheZipFileName):
+ os.remove(cacheZipFileName)
+
+ def load(self, dataPack: "minddatapack.DataPack", ipfsCID: str):
+ cacheZipFileName = minddatapack.utils.CACHE_PREFIX + datetime.now().strftime("%Y%m%d%H%M%S%f") + '.zip'
+ metaFileName = None
+ dataFileName = None
+ try:
+ response = requests.get("https://w3s.link/ipfs/" + ipfsCID, stream=True)
+ if response and response.status_code == 200:
+ with open(cacheZipFileName, 'wb') as file:
+ for chunk in response.iter_content(chunk_size=1024*64):
+ file.write(chunk)
+ with zipfile.ZipFile(cacheZipFileName, "r") as zip:
+ fileNames = zip.namelist()
+ if len(fileNames) != 2:
+ return ResultType(60015, "Invalid DataPack data", None)
+ for fileName in fileNames:
+ if fileName.endswith(minddatapack.utils.METADATA_EXT):
+ metaFileName = fileName
+ elif fileName.endswith('.csv'):
+ dataFileName = fileName
+ if metaFileName != dataFileName + minddatapack.utils.METADATA_EXT:
+ return ResultType(60015, "Invalid DataPack data", None)
+ zip.extractall()
+ return dataPack._loadFromLocalFile(dataFileName)
+ else:
+ return ResultType(60001, "Network error", None)
+ except Exception as e:
+ logging.debug("Exception:", str(e))
+ return ResultType(60014, "Fail to connect to IPFS", None)
+ finally:
+ if os.path.exists(cacheZipFileName):
+ os.remove(cacheZipFileName)
+ if dataFileName and os.path.exists(dataFileName):
+ os.remove(dataFileName)
+ if metaFileName and os.path.exists(metaFileName):
+ os.remove(metaFileName)
+ if os.path.exists(ipfsCID):
+ os.rmdir(ipfsCID)
diff --git a/pyproject.toml b/pyproject.toml
index bc70519..022a2c4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project]
name = "minddatapack"
-version = "v1.0.1"
+version = "v1.0.3"
authors = [
{ name="Mind Labs", email="biz@mindnetwork.xyz" },
]