diff --git a/README.md b/README.md index ab42f02..5358a1d 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,8 @@ Full doc: [https://mind-network.gitbook.io/mind-lake-sdk](https://mind-network.g * Initial Release * v1.0.1 * Add IPFS support +* v1.0.3 + * Change connectors to be class ## License diff --git a/examples/use_case_arweave.ipynb b/examples/use_case_arweave.ipynb index 461e028..c7c11ce 100644 --- a/examples/use_case_arweave.ipynb +++ b/examples/use_case_arweave.ipynb @@ -230,12 +230,12 @@ ], "source": [ "dataPack1 = DataPack(env.walletPrivateKey)\n", - "result = dataPack1.loadFromCSVFileByDefineColumn(fn_local_csv, \n", - " [\n", + "localFile1 = DataPack.LocalFileConnector(columns=[\n", " DataPack.Column('uid', DataPack.DataType.int4, False),\n", " DataPack.Column('wallet_address', DataPack.DataType.text, True),\n", " DataPack.Column('register_date', DataPack.DataType.timestamp, True)\n", " ])\n", + "result = dataPack1.loadFrom(localFile1, fn_local_csv)\n", "assert result, result.message\n", "df = pandas.DataFrame(dataPack1.data)\n", "df" @@ -272,7 +272,8 @@ "print(\"dropTable result:\", result.message)\n", "\n", "# upload from local csv into mindlake\n", - "result = dataPack1.saveToMindLake('a_new_table', mindlake)\n", + "mindLakeConnector = DataPack.MindLakeConnector(mindlake)\n", + "result = dataPack1.saveTo(mindLakeConnector, 'a_new_table')\n", "assert result, result.message\n", "print(\"a_new_table has been saved to MindLake.\")" ] @@ -362,8 +363,8 @@ ], "source": [ "dataPack2 = DataPack(env.walletPrivateKey)\n", - "# make sure if you complete Step 1, so \"a_new_table\" exists in mindlake \n", - "result = dataPack2.loadFromMindByQuery('select * from \"a_new_table\"', mindlake)\n", + "# make sure if you complete Step 1, so \"a_new_table\" exists in mindlake\n", + "result = dataPack2.loadFrom(mindLakeConnector, 'select * from \"a_new_table\"')\n", "assert result, result.message\n", "df = pandas.DataFrame(dataPack2.data)\n", "df" @@ -388,24 +389,25 @@ "text": [ "test_table_encrypted.csv has been saved to Arweave.\n", "\n", - "The Arweave ID is: sMboxrf5U9Z-FM_-woSfiVerz0Lzpa1_OCmRX09Jgn0 \n", + "The Arweave ID is: DwyQeo8B7RdoJlqWNGm1WHiKVh-ua5BmclKu9pdX29w \n", "\n", - "You can check on Arweave: https://viewblock.io/arweave/tx/sMboxrf5U9Z-FM_-woSfiVerz0Lzpa1_OCmRX09Jgn0\n", + "You can check on Arweave: https://viewblock.io/arweave/tx/DwyQeo8B7RdoJlqWNGm1WHiKVh-ua5BmclKu9pdX29w\n", "\n", - "And the content on Arweave: https://arseed.web3infra.dev/sMboxrf5U9Z-FM_-woSfiVerz0Lzpa1_OCmRX09Jgn0\n", + "And the content on Arweave: https://arseed.web3infra.dev/DwyQeo8B7RdoJlqWNGm1WHiKVh-ua5BmclKu9pdX29w\n", "\n", "The content is:\n", "uid,wallet_address,register_date\n", - "123,\\x9326bce223092712067ca9dd8d6600fd6d0a8e572261927932f0b222092249804635af229e5bfd2e8ed550fb44539c37fa5f4bc77e48499ce01ed9bec037ea7ddd,\\x992afbd2992fcc46071972b3069b44476bc2316cacbe12a5305839d60ab43dc674\n", - "124,\\x7726950c54dfd2470b9ef8929ee3c049af498bf99a56756f1824f0146fcda22df0bb392959e42a2c8639604ec301425e31844b8a7b847ae486abd29d6594c0ff7c,\\xcdf8811927b22d47aa10e2e8f367e986461d43242b9f81ff62564424d64f958235\n", - "125,\\x72ccfbbd92144aed8bd0763e30f63e68ac99f095a5fdcc3772ea4b35ead34ede8fc891e7b8e73ae5d07e25fc3b801e66bbea0a110e9047b3bccfdddeac8f7cbd51,\\xb8138ce9e9feaff5c610f424cc97dfb44d1809ad31ca830fd15039d9d3124679ac\n", + "123,\\x645039a18f2d0ecfda0049619267f6e4f658d3c8dea202cf2953eaee63894b0b3af0c159a58036cd81d29d4f07d63f75236243d1bbdfd447a90a68e901a62f4153,\\xc9bd351c80b4784cdb1e26b10a8064946eb62b1d53b2b1f42da3092bf6e2cd054f\n", + "124,\\x6e6190fd99e05a04ea63768f435b8913a350876a758869804ae6a8ca824091af8b89b821cb2f9f1a7b4bdf51e90e94d77e41dac7637bf1babcfa7f1ba94f103ff5,\\x69239ddf275ef971a8bd7051988b1f92dd784377df7b4989719322aab31eeb3cb6\n", + "125,\\x54c1c6f1bc35bba9a1903e7774ca3ba6646b2aaf0f4a4b92262f8932b92bfb8d3a1699ebbc6b391e3b19983aa7832be12d75d5bdfe128c094e61b7f0a1dbab2e2c,\\x045d590d0ef821fe691ee42e04d066338392adcaf50627e3335250e9886d1a7c02\n", "\n" ] } ], "source": [ - "result = dataPack2.saveToArweave('test_table_encrypted.csv', 'ACNH')\n", - "# result = dataPack2.saveToArweave('test_table_encrypted.csv', 'AR', './arweave_wallet.json')\n", + "arweave1 = DataPack.ArweaveConnector('ACNH', ethWalletPrivateKey=env.walletPrivateKey)\n", + "# arweave = DataPack.ArweaveConnector('AR', arweaveWalletFile='./arweave_wallet.json')\n", + "result = dataPack2.saveTo(arweave1, 'test_table_encrypted.csv')\n", "assert result, result.message\n", "print(\"test_table_encrypted.csv has been saved to Arweave.\\n\")\n", "itemID = result.data\n", @@ -504,7 +506,8 @@ ], "source": [ "dataPack3 = DataPack(env.walletPrivateKey)\n", - "result = dataPack3.loadFromArweave(itemID)\n", + "arweave2 = DataPack.ArweaveConnector()\n", + "result = dataPack3.loadFrom(arweave2, itemID)\n", "assert result, result.message\n", "print(\"You can see file on Arweave are encrypted, but datapack can decrypted it locally\")\n", "df = pandas.DataFrame(dataPack3.data)\n", @@ -561,20 +564,20 @@ " \n", " 0\n", " 123\n", - " \\x9c851ae97fd6eeb3ae2437c274274b3d773fcdee11f9...\n", - " \\x44e6bdad517e634a985702320c26770a9275c79aa9b3...\n", + " \\x3c364d7aed8d63b68701fd00feac41435ff456eb307d...\n", + " \\x35bafd055143484471c52153385b7ee5eedc58b09021...\n", " \n", " \n", " 1\n", " 124\n", - " \\xaddba1e7648b04018dabaceb918cebb436dff5e57e61...\n", - " \\xb5dac593d9436309272b5b56c16d4db24bab7d9e130a...\n", + " \\xbe1d0f00778d9e15e08672f9494a22c11ef870758572...\n", + " \\x25ceb7256dd9ab8ef563ae220c62df39cf13bf3a7d08...\n", " \n", " \n", " 2\n", " 125\n", - " \\x9afc9f24ec67f79105a340e939be6f1808f5a34495f2...\n", - " \\x755479852b673a2183e1168734e78378fcf81413c3f2...\n", + " \\xa5805d615740ead33874d7036246cc2aee7665901d3c...\n", + " \\xb541751a94f6b5128af3a5939867531ed5736b0fd7c4...\n", " \n", " \n", "\n", @@ -582,14 +585,14 @@ ], "text/plain": [ " uid wallet_address \\\n", - "0 123 \\x9c851ae97fd6eeb3ae2437c274274b3d773fcdee11f9... \n", - "1 124 \\xaddba1e7648b04018dabaceb918cebb436dff5e57e61... \n", - "2 125 \\x9afc9f24ec67f79105a340e939be6f1808f5a34495f2... \n", + "0 123 \\x3c364d7aed8d63b68701fd00feac41435ff456eb307d... \n", + "1 124 \\xbe1d0f00778d9e15e08672f9494a22c11ef870758572... \n", + "2 125 \\xa5805d615740ead33874d7036246cc2aee7665901d3c... \n", "\n", " register_date \n", - "0 \\x44e6bdad517e634a985702320c26770a9275c79aa9b3... \n", - "1 \\xb5dac593d9436309272b5b56c16d4db24bab7d9e130a... \n", - "2 \\x755479852b673a2183e1168734e78378fcf81413c3f2... " + "0 \\x35bafd055143484471c52153385b7ee5eedc58b09021... \n", + "1 \\x25ceb7256dd9ab8ef563ae220c62df39cf13bf3a7d08... \n", + "2 \\xb541751a94f6b5128af3a5939867531ed5736b0fd7c4... " ] }, "execution_count": 10, @@ -598,7 +601,8 @@ } ], "source": [ - "result = dataPack3.saveToLocalFile('test_localtable_encrypted.csv', False)\n", + "localFile2 = DataPack.LocalFileConnector()\n", + "result = dataPack3.saveTo(localFile2, 'test_localtable_encrypted.csv')\n", "assert result, result.message\n", "print(\"test_localtable_encrypted.csv has been saved to local file.\")\n", "df = pandas.read_csv('test_localtable_encrypted.csv')\n", diff --git a/examples/use_case_ipfs.ipynb b/examples/use_case_ipfs.ipynb index c427c26..30aacbe 100644 --- a/examples/use_case_ipfs.ipynb +++ b/examples/use_case_ipfs.ipynb @@ -228,12 +228,12 @@ ], "source": [ "dataPack1 = DataPack(env.walletPrivateKey)\n", - "result = dataPack1.loadFromCSVFileByDefineColumn(fn_local_csv, \n", - " [\n", + "localFile1 = DataPack.LocalFileConnector(columns=[\n", " DataPack.Column('uid', DataPack.DataType.int4, False),\n", " DataPack.Column('wallet_address', DataPack.DataType.text, True),\n", " DataPack.Column('register_date', DataPack.DataType.timestamp, True)\n", " ])\n", + "result = dataPack1.loadFrom(localFile1, fn_local_csv)\n", "assert result, result.message\n", "df = pandas.DataFrame(dataPack1.data)\n", "df" @@ -270,7 +270,8 @@ "print(\"dropTable result:\", result.message)\n", "\n", "# upload from local csv into mindlake\n", - "result = dataPack1.saveToMindLake('a_new_table', mindlake)\n", + "mindLakeConnector = DataPack.MindLakeConnector(mindlake)\n", + "result = dataPack1.saveTo(mindLakeConnector, 'a_new_table')\n", "assert result, result.message\n", "print(\"a_new_table has been saved to MindLake.\")" ] @@ -360,8 +361,8 @@ ], "source": [ "dataPack2 = DataPack(env.walletPrivateKey)\n", - "# make sure if you complete Step 1, so \"a_new_table\" exists in mindlake \n", - "result = dataPack2.loadFromMindByQuery('select * from \"a_new_table\"', mindlake)\n", + "# make sure if you complete Step 1, so \"a_new_table\" exists in mindlake\n", + "result = dataPack2.loadFrom(mindLakeConnector, 'select * from \"a_new_table\"')\n", "assert result, result.message\n", "df = pandas.DataFrame(dataPack2.data)\n", "df" @@ -386,24 +387,25 @@ "text": [ "test_table_encrypted.csv has been saved to IPFS.\n", "\n", - "The IPFS ID is: QmYxCi1BBhbch496SDVP58VesPuTnraPgu57S84b1XiteM \n", + "The IPFS ID is: QmU1nri3draq7SAHVAjSx91dEVCw1e1LGdv1NaiepGvTBm \n", "\n", - "You can check on local IPFS http service: http://127.0.0.1:8080/ipfs/QmYxCi1BBhbch496SDVP58VesPuTnraPgu57S84b1XiteM\n", + "You can check on local IPFS http service: http://127.0.0.1:8080/ipfs/QmU1nri3draq7SAHVAjSx91dEVCw1e1LGdv1NaiepGvTBm\n", "\n", - "And the content on IPFS: http://127.0.0.1:8080/ipfs/QmYxCi1BBhbch496SDVP58VesPuTnraPgu57S84b1XiteM/test_table_encrypted.csv\n", + "And the content on IPFS: http://127.0.0.1:8080/ipfs/QmU1nri3draq7SAHVAjSx91dEVCw1e1LGdv1NaiepGvTBm/test_table_encrypted.csv\n", "\n", "The content is:\n", "uid,wallet_address,register_date\n", - "123,\\x4810c8a24f2f22243d778860d9ee5180d5375d6cbd22561e568c23bce63f7516d1d2887dcb1993de0133e1d1e002c06b38b56b25d23acac51e24e740f1c6c5e163,\\xf9977c69c6a3d716e615b344f4f66953a4b3d764892a5960460aba34a6c7645943\n", - "124,\\xa7fca7f580180de2e92df9ddf9608c2442dcfa947ba23e3be37bf4764f981670016df19ca4fb3e7295479de368c45122f2cca35d619a402ac28d437cfd0188233f,\\xf06543016d08a89717d34fea65b66b4b8878ae8f411598c3ce3cbde0795a58d2c7\n", - "125,\\xd26a30809f578388dfc97a0262190094c086060a380e7b83a5f2e9bd9cd680a6e272c900921ca7c5824391e4f6bc6854e622f59c0d891152e861ff7ba62f542c40,\\xfa94a16e90f53456b9d053537fed04802514b15a3506055acbbd519e67de1d6e1c\n", + "123,\\x82148bcab7b5c8fc8f81ce8cb79b307d81bfde84e51fa87ae99d56b9985b8b9018b26eff1b96a38cf148ffe1b923b99be3908a7e6d27d57e78d769af6be82dc92d,\\x0776d01ca9ffa39ea95916108147db031025ce9e3ce7f6cba619041b9fe9c247ee\n", + "124,\\x43ad786aca3e01879ee6b7753032842658e6c24a302bf2986b70c247a36bf54724ec914002f930b13fa9de88b072502f887a930b655590090a21d0f6253ab9b5f4,\\xcf0cc270db2108338f9440a0fb810706e31a73b20284226d9bddc40649d66e43f7\n", + "125,\\xe49d212633890b86855d663d90ee71ed323b68f67a53fed837c88f037d5fdb4ef5456e596a88a10a6b3d9c347033e4c0c7e1f566017fed5c49d7546fbf76348c8d,\\xb8ec6581139a9c29ce58c85f094ec53e26a23047d6ea3c73edbdcf57500e9d5de7\n", "\n" ] } ], "source": [ "# make sure you have started IPFS service on your local machine\n", - "result = dataPack2.saveToIPFS('test_table_encrypted.csv')\n", + "ipfs1 = DataPack.IPFSConnector()\n", + "result = dataPack2.saveTo(ipfs1, 'test_table_encrypted.csv')\n", "assert result, result.message\n", "print(\"test_table_encrypted.csv has been saved to IPFS.\\n\")\n", "ipfsHash = result.data\n", @@ -502,7 +504,8 @@ ], "source": [ "dataPack3 = DataPack(env.walletPrivateKey)\n", - "result = dataPack3.loadFromIPFS(ipfsHash)\n", + "ipfs2 = DataPack.IPFSConnector()\n", + "result = dataPack3.loadFrom(ipfs2, ipfsHash)\n", "assert result, result.message\n", "print(\"You can see file on IPFS are encrypted, but datapack can decrypted it locally\")\n", "df = pandas.DataFrame(dataPack3.data)\n", @@ -559,20 +562,20 @@ " \n", " 0\n", " 123\n", - " \\x1295126db9fb5f12c77ca17a7b2230509f05ee225d15...\n", - " \\x3f7a44eb7cdd6c1ff9433b4b911a0a400af3aafdc6dd...\n", + " \\x14bb21b7b3105cac85bc84383b3b0b84bf9d0503940d...\n", + " \\x6aa4984ca3bd3682679308b2ef22cf6f12d42698fde0...\n", " \n", " \n", " 1\n", " 124\n", - " \\x3c9791f3a72caf6f8f4ed8d699c9efd63eccb60961b4...\n", - " \\x83cd814f9a8f2a1e1684a62e64d87f488517526441b3...\n", + " \\x3f87e2905e69ee36419ac126dee7eb23b1b7d3b5ba16...\n", + " \\x9b8355a538a4b2f84824f55d75a098d984145de1371f...\n", " \n", " \n", " 2\n", " 125\n", - " \\xe2137c5d87fbaa2fe39f2fea8731992ea7a194b90641...\n", - " \\x550d9b3fc3d1edd7866990620a91ac5164c9fe9e30e0...\n", + " \\x7901ba445c75ceb0016914f307707c84a26b655fe7fd...\n", + " \\x7e91fca841863ca2ae32b2b1098b5191fdba7e0e0e6a...\n", " \n", " \n", "\n", @@ -580,14 +583,14 @@ ], "text/plain": [ " uid wallet_address \\\n", - "0 123 \\x1295126db9fb5f12c77ca17a7b2230509f05ee225d15... \n", - "1 124 \\x3c9791f3a72caf6f8f4ed8d699c9efd63eccb60961b4... \n", - "2 125 \\xe2137c5d87fbaa2fe39f2fea8731992ea7a194b90641... \n", + "0 123 \\x14bb21b7b3105cac85bc84383b3b0b84bf9d0503940d... \n", + "1 124 \\x3f87e2905e69ee36419ac126dee7eb23b1b7d3b5ba16... \n", + "2 125 \\x7901ba445c75ceb0016914f307707c84a26b655fe7fd... \n", "\n", " register_date \n", - "0 \\x3f7a44eb7cdd6c1ff9433b4b911a0a400af3aafdc6dd... \n", - "1 \\x83cd814f9a8f2a1e1684a62e64d87f488517526441b3... \n", - "2 \\x550d9b3fc3d1edd7866990620a91ac5164c9fe9e30e0... " + "0 \\x6aa4984ca3bd3682679308b2ef22cf6f12d42698fde0... \n", + "1 \\x9b8355a538a4b2f84824f55d75a098d984145de1371f... \n", + "2 \\x7e91fca841863ca2ae32b2b1098b5191fdba7e0e0e6a... " ] }, "execution_count": 10, @@ -596,7 +599,8 @@ } ], "source": [ - "result = dataPack3.saveToLocalFile('test_localtable_encrypted.csv', False)\n", + "localFile2 = DataPack.LocalFileConnector()\n", + "result = dataPack3.saveTo(localFile2, 'test_localtable_encrypted.csv')\n", "assert result, result.message\n", "print(\"test_localtable_encrypted.csv has been saved to local file.\")\n", "df = pandas.read_csv('test_localtable_encrypted.csv')\n", diff --git a/examples/use_case_web3storage.ipynb b/examples/use_case_web3storage.ipynb new file mode 100644 index 0000000..236cf58 --- /dev/null +++ b/examples/use_case_web3storage.ipynb @@ -0,0 +1,629 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Step 1: configuration" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Input your walletPrivateKey here:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "check env.walletPrivateKey: 2a776\n", + "check env.mindLakeAppKey: Kyunq\n", + "check env.MINDLAKE_GATEWAY: https://sdk.mindnetwork.xyz/node\n", + "check env.web3StorageToken: eyJhb\n" + ] + } + ], + "source": [ + "# make sure you configure mindlakesdk, please check env.py in the same folder\n", + "# please familar these configure, and you can read https://github.com/mind-network/mind-lake-sdk-python \n", + "# You can get web3StorageToken from https://web3.storage/\n", + "import env\n", + "\n", + "print(\"check env.walletPrivateKey:\", env.walletPrivateKey[0:5])\n", + "print(\"check env.mindLakeAppKey:\", env.mindLakeAppKey[0:5])\n", + "print(\"check env.MINDLAKE_GATEWAY:\", env.MINDLAKE_GATEWAY)\n", + "print(\"check env.web3StorageToken:\", env.web3StorageToken[0:5])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install depedancy and source code" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# please \"pip install mindlakesdk\" if not installed\n", + "import mindlakesdk\n", + "\n", + "# please \"pip install minddatapack\" if not installed, or git clone the source code\n", + "from minddatapack import DataPack\n", + "\n", + "# please \"pip install pandas\" if not installed, it is used to display structure content in the notebook\n", + "import pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# check if example_data.csv file exists in the same folder, this demo will start to load this csv file." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2. Local CSV -> MindLake\n", + "- Load data from a local CSV file without the metadata file, by defining each column manually.\n", + "- You can also load from a local backup into MindLake once you are familar, then no need to define each column. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidwallet_addressregister_date
01230x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
11240x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
21250x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
\n", + "
" + ], + "text/plain": [ + " uid wallet_address register_date\n", + "0 123 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n", + "1 124 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n", + "2 125 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fn_local_csv = './example_data.csv'\n", + "\n", + "# preview local csv\n", + "df = pandas.read_csv(fn_local_csv)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012
01230x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
11240x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
21250x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
\n", + "
" + ], + "text/plain": [ + " 0 1 2\n", + "0 123 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n", + "1 124 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n", + "2 125 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataPack1 = DataPack(env.walletPrivateKey)\n", + "localFile1 = DataPack.LocalFileConnector(columns=[\n", + " DataPack.Column('uid', DataPack.DataType.int4, False),\n", + " DataPack.Column('wallet_address', DataPack.DataType.text, True),\n", + " DataPack.Column('register_date', DataPack.DataType.timestamp, True)\n", + " ])\n", + "result = dataPack1.loadFrom(localFile1, fn_local_csv)\n", + "assert result, result.message\n", + "df = pandas.DataFrame(dataPack1.data)\n", + "df" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Save the data into a table in MindLake" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dropTable result: Success\n", + "a_new_table has been saved to MindLake.\n" + ] + } + ], + "source": [ + "mindlake = mindlakesdk.connect(env.walletPrivateKey, env.mindLakeAppKey, env.MINDLAKE_GATEWAY)\n", + "assert mindlake, mindlake.message\n", + "\n", + "# drop the table if exists\n", + "result = mindlake.datalake.dropTable('a_new_table')\n", + "print(\"dropTable result:\", result.message)\n", + "\n", + "# upload from local csv into mindlake\n", + "mindLakeConnector = DataPack.MindLakeConnector(mindlake)\n", + "result = dataPack1.saveTo(mindLakeConnector, 'a_new_table')\n", + "assert result, result.message\n", + "print(\"a_new_table has been saved to MindLake.\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "you can also check this new downloaded table in https://scan.mindnetwork.xyz/account/myData" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3. MindLake -> Web3Storage\n", + "- Load data from a table in MindLake" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012
01230x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
11240x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
21250x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
\n", + "
" + ], + "text/plain": [ + " 0 1 2\n", + "0 123 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n", + "1 124 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n", + "2 125 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataPack2 = DataPack(env.walletPrivateKey)\n", + "# make sure if you complete Step 1, so \"a_new_table\" exists in mindlake\n", + "result = dataPack2.loadFrom(mindLakeConnector, 'select * from \"a_new_table\"')\n", + "assert result, result.message\n", + "df = pandas.DataFrame(dataPack2.data)\n", + "df" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Save the data into Web3Storage" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "test_table_encrypted.csv has been saved to Web3Storage.\n", + "\n", + "The IPFS CID is: bafkreigsgm7gmydxagcuo3w6jtnsm36kvdoiml3dtfjomy6ksioqe6d3n4 \n", + "\n", + "You can download the archive file from: https://w3s.link/ipfs/bafkreigsgm7gmydxagcuo3w6jtnsm36kvdoiml3dtfjomy6ksioqe6d3n4\n", + "\n", + "Here is storage info of the archive file:\n", + "{\"_id\":\"315318962443259193\",\"type\":\"Upload\",\"name\":\"Upload at 2023-09-03T09:09:01.164Z\",\"created\":\"2023-09-03T09:09:01.164+00:00\",\"updated\":\"2023-09-03T09:09:01.164+00:00\",\"cid\":\"bafkreigsgm7gmydxagcuo3w6jtnsm36kvdoiml3dtfjomy6ksioqe6d3n4\",\"dagSize\":1626,\"pins\":[{\"status\":\"Pinned\",\"updated\":\"2023-09-03T09:09:01.164+00:00\",\"peerId\":\"bafzbeibhqavlasjc7dvbiopygwncnrtvjd2xmryk5laib7zyjor6kf3avm\",\"peerName\":\"elastic-ipfs\",\"region\":null}],\"deals\":[]}\n" + ] + } + ], + "source": [ + "web3Storage1 = DataPack.Web3StorageConnector(env.web3StorageToken)\n", + "result = dataPack2.saveTo(web3Storage1, 'test_table_encrypted.csv')\n", + "assert result, result.message\n", + "print(\"test_table_encrypted.csv has been saved to Web3Storage.\\n\")\n", + "ipfsCid = result.data\n", + "print(\"The IPFS CID is:\", ipfsCid, '\\n')\n", + "ipfs_file_url = \"https://w3s.link/ipfs/%s\"%ipfsCid\n", + "ipfs_file_info_url = \"https://api.web3.storage/user/uploads/%s\"%ipfsCid\n", + "print(f\"You can download the archive file from: {ipfs_file_url}\\n\")\n", + "print(\"Here is storage info of the archive file:\")\n", + "import requests\n", + "headers = {\n", + " \"Authorization\": \"Bearer \" + env.web3StorageToken\n", + "}\n", + "content = requests.get(ipfs_file_info_url, headers=headers).text\n", + "print(content)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4. Web3Storage -> Local File\n", + "- Load data from Web3Storage and save to local file" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "You can see file on Web3Storage are encrypted, but datapack can decrypted it locally\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012
01230x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
11240x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
21250x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
\n", + "
" + ], + "text/plain": [ + " 0 1 2\n", + "0 123 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n", + "1 124 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n", + "2 125 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataPack3 = DataPack(env.walletPrivateKey)\n", + "web3Storage2 = DataPack.Web3StorageConnector()\n", + "result = dataPack3.loadFrom(web3Storage2, ipfsCid)\n", + "assert result, result.message\n", + "print(\"You can see file on Web3Storage are encrypted, but datapack can decrypted it locally\")\n", + "df = pandas.DataFrame(dataPack3.data)\n", + "df" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Save the data into a local file with encryption" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "test_localtable_encrypted.csv has been saved to local file.\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidwallet_addressregister_date
0123\\xec58be677e5c22bac7ea266df234565fe56506aa0d5e...\\x1ef6e317b5cbfe4568fc648dad1bb9e7e8e872005071...
1124\\xa55d391505fbb845d77990d3c415dbed2d758b7d3977...\\xd460941b3d1cbb9bbd1014a02712b18a918b869af046...
2125\\xcc8e1cffefa3029ecb20518c098254aa421d8904f92a...\\xe8d4aac12f92c75cd1589b028a7ae5f3e9c47c1259f6...
\n", + "
" + ], + "text/plain": [ + " uid wallet_address \\\n", + "0 123 \\xec58be677e5c22bac7ea266df234565fe56506aa0d5e... \n", + "1 124 \\xa55d391505fbb845d77990d3c415dbed2d758b7d3977... \n", + "2 125 \\xcc8e1cffefa3029ecb20518c098254aa421d8904f92a... \n", + "\n", + " register_date \n", + "0 \\x1ef6e317b5cbfe4568fc648dad1bb9e7e8e872005071... \n", + "1 \\xd460941b3d1cbb9bbd1014a02712b18a918b869af046... \n", + "2 \\xe8d4aac12f92c75cd1589b028a7ae5f3e9c47c1259f6... " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "localFile2 = DataPack.LocalFileConnector()\n", + "result = dataPack3.saveTo(localFile2, 'test_localtable_encrypted.csv')\n", + "assert result, result.message\n", + "print(\"test_localtable_encrypted.csv has been saved to local file.\")\n", + "df = pandas.read_csv('test_localtable_encrypted.csv')\n", + "df" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "datapack-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.17" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/minddatapack/__init__.py b/minddatapack/__init__.py index 7d3414a..8de1fa3 100644 --- a/minddatapack/__init__.py +++ b/minddatapack/__init__.py @@ -4,15 +4,21 @@ from mindlakesdk.utils import ResultType, DataType from web3 import Web3 import importlib.metadata -import minddatapack.arweaveconnector +from minddatapack.arweaveconnector import ArweaveConnector +from minddatapack.ipfsconnector import IPFSConnector +from minddatapack.web3storageconnector import Web3StorageConnector import minddatapack.mindlakeconnector import minddatapack.localfileconnector -import minddatapack.ipfsconnector -from minddatapack.utils import Column +from minddatapack.utils import Column, Connector class DataPack: DataType = DataType Column = Column + IPFSConnector = IPFSConnector + Web3StorageConnector = Web3StorageConnector + ArweaveConnector = ArweaveConnector + LocalFileConnector = minddatapack.localfileconnector.LocalFileConnector + MindLakeConnector = minddatapack.mindlakeconnector.MindLakeConnector def __init__(self, walletPrivateKey: str): self.existData = False @@ -20,38 +26,35 @@ def __init__(self, walletPrivateKey: str): self.data = None self.columnName = None self.fileName = None - self.primaryKey = None - self.version = importlib.metadata.version('minddatapack') - self.__walletPrivateKey = walletPrivateKey + # self.primaryKey = None + try: + self.version = importlib.metadata.version('minddatapack') + except: + self.version = '0.0.0' web3 = Web3(Web3.HTTPProvider(mindlakesdk.settings.WEB3API)) self.__walletAccount = web3.eth.account.from_key(walletPrivateKey) - def loadFromMindByQuery(self, sqlStatement: str, mindLake: mindlakesdk.MindLake) -> ResultType: + def saveTo(self, connector: Connector, name: str) -> ResultType: + return connector.save(self, name) + + def loadFrom(self, connector: Connector, identifier: str) -> ResultType: + return connector.load(self, identifier) + + def _loadFromMindByQuery(self, sqlStatement: str, mindLake: mindlakesdk.MindLake) -> ResultType: result, self.__columns = minddatapack.mindlakeconnector.loadFromMindByQuery(self, sqlStatement, mindLake) return result - def saveToMindLake(self, tableName: str, mindLake: mindlakesdk.MindLake) -> ResultType: + def _saveToMindLake(self, tableName: str, mindLake: mindlakesdk.MindLake) -> ResultType: return minddatapack.mindlakeconnector.saveToMindLake(tableName, mindLake, self.data, self.__columns) - def saveToLocalFile(self, filePath: str, ignoreEncrypt: bool = False): + def _saveToLocalFile(self, filePath: str, ignoreEncrypt: bool = False): return minddatapack.localfileconnector.saveToLocalFile(self, filePath, ignoreEncrypt, self.__columns, self.__walletAccount) - def loadFromLocalFile(self, filePath: str): + def _loadFromLocalFile(self, filePath: str): result, self.__columns = minddatapack.localfileconnector.loadFromLocalFile(self, filePath, self.__walletAccount) return result - def loadFromCSVFileByDefineColumn(self, csvFilePath: str, columns: list): + def _loadFromCSVFileByDefineColumn(self, csvFilePath: str, columns: list): self.__columns = columns return minddatapack.localfileconnector.loadFromCSVFileByDefineColumn(self, csvFilePath, columns) - - def saveToArweave(self, fileName: str, tokenName: str, arWalletFile: str = None): - return minddatapack.arweaveconnector.saveToArweave(self, fileName, tokenName, arWalletFile, self.__walletPrivateKey) - - def loadFromArweave(self, id: str, arGateway: str = 'https://arseed.web3infra.dev/'): - return minddatapack.arweaveconnector.loadFromArweave(self, id, arGateway) - - def saveToIPFS(self, fileName: str, apiEndpoint: str = 'http://localhost:5001', apiKey: str = None, apiSecret: str = None): - return minddatapack.ipfsconnector.saveToIPFS(self, fileName, apiEndpoint, apiKey, apiSecret) - - def loadFromIPFS(self, ipfsCID: str, apiEndpoint: str = 'http://localhost:5001', apiKey: str = None, apiSecret: str = None): - return minddatapack.ipfsconnector.loadFromIPFS(self, ipfsCID, apiEndpoint, apiKey, apiSecret) + \ No newline at end of file diff --git a/minddatapack/arweaveconnector.py b/minddatapack/arweaveconnector.py index 8012f4d..30361f3 100644 --- a/minddatapack/arweaveconnector.py +++ b/minddatapack/arweaveconnector.py @@ -6,64 +6,76 @@ from mindlakesdk.utils import ResultType import minddatapack.utils -def saveToArweave(dataPack, fileName: str, tokenName: str, arWalletFile: str, ethWalletPrivateKey: str): - result = dataPack.saveToLocalFile(fileName) - if not result: - return result - dataFileName = fileName - metaFileName = fileName + minddatapack.utils.METADATA_EXT - try: - import arseeding, everpay - if arWalletFile: - signer = everpay.ARSigner(arWalletFile) +class ArweaveConnector(minddatapack.utils.Connector): + def __init__(self, tokenName: str = None, arWalletFile: str = None, ethWalletPrivateKey: str = None, arGateway: str = None): + self.tokenName = tokenName + self.arWalletFile = arWalletFile + self.ethWalletPrivateKey = ethWalletPrivateKey + if arGateway: + if arGateway[-1] != '/': + arGateway += '/' + self.arGateway = arGateway else: - signer = everpay.ETHSigner(ethWalletPrivateKey) - with open(dataFileName, 'rb') as dataFile: - with open(metaFileName, 'r') as metaFile: - data = dataFile.read() - metadata = metaFile.read() - order = arseeding.send_and_pay(signer, tokenName, data, tags= [ - {'name': 'App-Name', 'value': 'Mind-DataPack'}, - {'name': 'MindDataPackMetaData', 'value': metadata}, - {'name': 'Content-Type', 'value': 'text/csv'}]) - return ResultType(0, "Success", order['itemId']) - except Exception as e: - logging.debug("Exception:", str(e)) - return ResultType(60014, "Fail to connect to Arweave", None) - finally: - if os.path.exists(dataFileName): - os.remove(dataFileName) - if os.path.exists(metaFileName): - os.remove(metaFileName) + self.arGateway = 'https://arseed.web3infra.dev/' -def loadFromArweave(dataPack, id: str, arGateway: str): - cacheDataFileName = minddatapack.utils.CACHE_PREFIX + datetime.now().strftime("%Y%m%d%H%M%S%f") + '.csv' - cacheMetaFileName = cacheDataFileName + minddatapack.utils.METADATA_EXT - try: - if arGateway[-1] != '/': - arGateway += '/' - metaUrl = arGateway + 'bundle/tx/' + id - metaResponse = requests.get(metaUrl) - if metaResponse and metaResponse.status_code == 200: - txMeta = json.loads(metaResponse.text) - metadataJsonStr = txMeta['tags'][1]['value'] - with open(cacheMetaFileName, 'wb') as file: - file.write(metadataJsonStr.encode('utf-8')) - - dataUrl = arGateway + id - dataResponse = requests.get(dataUrl) - if dataResponse and dataResponse.status_code == 200: - with open(cacheDataFileName, 'wb') as file: - file.write(dataResponse.content) + def save(self, dataPack: "minddatapack.DataPack", fileName: str) -> ResultType: + result = dataPack._saveToLocalFile(fileName) + if not result: + return result + dataFileName = fileName + metaFileName = fileName + minddatapack.utils.METADATA_EXT + try: + import arseeding, everpay + if self.arWalletFile: + signer = everpay.ARSigner(self.arWalletFile) + else: + signer = everpay.ETHSigner(self.ethWalletPrivateKey) + with open(dataFileName, 'rb') as dataFile: + with open(metaFileName, 'r') as metaFile: + data = dataFile.read() + metadata = metaFile.read() + order = arseeding.send_and_pay(signer, self.tokenName, data, tags= [ + {'name': 'App-Name', 'value': 'Mind-DataPack'}, + {'name': 'MindDataPackMetaData', 'value': metadata}, + {'name': 'Content-Type', 'value': 'text/csv'}]) + return ResultType(0, "Success", order['itemId']) + except Exception as e: + logging.debug("Exception:", str(e)) + return ResultType(60014, "Fail to connect to Arweave", None) + finally: + if os.path.exists(dataFileName): + os.remove(dataFileName) + if os.path.exists(metaFileName): + os.remove(metaFileName) - return dataPack.loadFromLocalFile(cacheDataFileName) - else: - return ResultType(60001, "Network error", None) - except Exception as e: - logging.debug("Exception:", str(e)) - return ResultType(60014, "Fail to connect to Arweave", None) - finally: - if os.path.exists(cacheDataFileName): - os.remove(cacheDataFileName) - if os.path.exists(cacheMetaFileName): - os.remove(cacheMetaFileName) + def load(self, dataPack: "minddatapack.DataPack", transactionID: str) -> ResultType: + cacheDataFileName = minddatapack.utils.CACHE_PREFIX + datetime.now().strftime("%Y%m%d%H%M%S%f") + '.csv' + cacheMetaFileName = cacheDataFileName + minddatapack.utils.METADATA_EXT + try: + metaUrl = self.arGateway + 'bundle/tx/' + transactionID + metaResponse = requests.get(metaUrl) + if metaResponse and metaResponse.status_code == 200: + txMeta = json.loads(metaResponse.text) + for tag in txMeta['tags']: + if tag['name'] == 'MindDataPackMetaData': + metadataJsonStr = tag['value'] + with open(cacheMetaFileName, 'wb') as file: + file.write(metadataJsonStr.encode('utf-8')) + + dataUrl = self.arGateway + transactionID + dataResponse = requests.get(dataUrl) + if dataResponse and dataResponse.status_code == 200: + with open(cacheDataFileName, 'wb') as file: + file.write(dataResponse.content) + + return dataPack._loadFromLocalFile(cacheDataFileName) + else: + return ResultType(60001, "Network error", None) + except Exception as e: + logging.debug("Exception:", str(e)) + return ResultType(60014, "Fail to connect to Arweave", None) + finally: + if os.path.exists(cacheDataFileName): + os.remove(cacheDataFileName) + if os.path.exists(cacheMetaFileName): + os.remove(cacheMetaFileName) diff --git a/minddatapack/ipfsconnector.py b/minddatapack/ipfsconnector.py index 69db82b..b89b86c 100644 --- a/minddatapack/ipfsconnector.py +++ b/minddatapack/ipfsconnector.py @@ -7,79 +7,86 @@ from mindlakesdk.utils import ResultType import minddatapack.utils +class IPFSConnector(minddatapack.utils.Connector): + def __init__(self, apiEndpoint: str = None, apiKey: str = None, apiSecret: str = None): + self.apiEndpoint = apiEndpoint + if not self.apiEndpoint: + self.apiEndpoint = 'http://localhost:5001' + self.apiKey = apiKey + self.apiSecret = apiSecret -def saveToIPFS(dataPack, fileName: str, apiEndpoint: str, apiKey: str, apiSecret: str) -> ResultType: - result = dataPack.saveToLocalFile(fileName) - if not result: - return result - metaFileName = fileName + minddatapack.utils.METADATA_EXT - try: - csvFile = open(fileName, 'rb') - metaFile = open(metaFileName, 'rb') - files = {} - files[fileName] = csvFile - files[metaFileName] = metaFile - if apiKey and apiSecret: - response = requests.post(apiEndpoint + '/api/v0/add?pin=true&wrap-with-directory=true', files=files, auth=(apiKey,apiSecret)) - else: - response = requests.post(apiEndpoint + '/api/v0/add?pin=true&wrap-with-directory=true', files=files) - if response and response.status_code == 200: - folderJson = response.text.splitlines()[-1] - ipfsHash = json.loads(folderJson)['Hash'] - return ResultType(0, "Success", ipfsHash) - else: - return ResultType(60001, "Network error", None) - except Exception as e: - logging.debug("Exception:", str(e)) - return ResultType(60014, "Fail to connect to IPFS", None) - finally: - if csvFile: - csvFile.close() - if metaFile: - metaFile.close() - if os.path.exists(fileName): - os.remove(fileName) - if os.path.exists(metaFileName): - os.remove(metaFileName) + def save(self, dataPack: "minddatapack.DataPack", fileName: str) -> ResultType: + result = dataPack._saveToLocalFile(fileName) + if not result: + return result + metaFileName = fileName + minddatapack.utils.METADATA_EXT + try: + csvFile = open(fileName, 'rb') + metaFile = open(metaFileName, 'rb') + files = {} + files[fileName] = csvFile + files[metaFileName] = metaFile + if self.apiKey and self.apiSecret: + response = requests.post(self.apiEndpoint + '/api/v0/add?pin=true&wrap-with-directory=true', files=files, auth=(self.apiKey,self.apiSecret)) + else: + response = requests.post(self.apiEndpoint + '/api/v0/add?pin=true&wrap-with-directory=true', files=files) + if response and response.status_code == 200: + folderJson = response.text.splitlines()[-1] + ipfsHash = json.loads(folderJson)['Hash'] + return ResultType(0, "Success", ipfsHash) + else: + return ResultType(60001, "Network error", None) + except Exception as e: + logging.debug("Exception:", str(e)) + return ResultType(60014, "Fail to connect to IPFS", None) + finally: + if csvFile: + csvFile.close() + if metaFile: + metaFile.close() + if os.path.exists(fileName): + os.remove(fileName) + if os.path.exists(metaFileName): + os.remove(metaFileName) -def loadFromIPFS(dataPack, ipfsCID: str, apiEndpoint: str, apiKey: str, apiSecret: str): - cacheTarFileName = minddatapack.utils.CACHE_PREFIX + datetime.now().strftime("%Y%m%d%H%M%S%f") + '.tar.gz' - metaFileName = None - dataFileName = None - try: - if apiKey and apiSecret: - response = requests.post(apiEndpoint + f'/api/v0/get?arg={ipfsCID}&archive=true&compress=true&compression-level=6', auth=(apiKey,apiSecret)) - else: - response = requests.post(apiEndpoint + f'/api/v0/get?arg={ipfsCID}&archive=true&compress=true&compression-level=6') - if response and response.status_code == 200: - with open(cacheTarFileName, 'wb') as file: - file.write(response.content) - with tarfile.open(cacheTarFileName, "r:gz") as tar: - members = tar.getmembers() - if len(members) != 3: - return ResultType(60015, "Invalid DataPack data", None) - for member in members: - nameSplit = member.name.split('/') - if len(nameSplit) == 2: - if member.name.endswith(minddatapack.utils.METADATA_EXT): - metaFileName = member.name - elif member.name.endswith('.csv'): - dataFileName = member.name - tar.extract(member) - if metaFileName != dataFileName + minddatapack.utils.METADATA_EXT: - return ResultType(60015, "Invalid DataPack data", None) - return dataPack.loadFromLocalFile(dataFileName) - else: - return ResultType(60001, "Network error", None) - except Exception as e: - logging.debug("Exception:", str(e)) - return ResultType(60014, "Fail to connect to IPFS", None) - finally: - if os.path.exists(cacheTarFileName): - os.remove(cacheTarFileName) - if dataFileName and os.path.exists(dataFileName): - os.remove(dataFileName) - if metaFileName and os.path.exists(metaFileName): - os.remove(metaFileName) - if os.path.exists(ipfsCID): - os.rmdir(ipfsCID) + def load(self, dataPack: "minddatapack.DataPack", ipfsCID: str) -> ResultType: + cacheTarFileName = minddatapack.utils.CACHE_PREFIX + datetime.now().strftime("%Y%m%d%H%M%S%f") + '.tar.gz' + metaFileName = None + dataFileName = None + try: + if self.apiKey and self.apiSecret: + response = requests.post(self.apiEndpoint + f'/api/v0/get?arg={ipfsCID}&archive=true&compress=true&compression-level=6', auth=(self.apiKey,self.apiSecret)) + else: + response = requests.post(self.apiEndpoint + f'/api/v0/get?arg={ipfsCID}&archive=true&compress=true&compression-level=6') + if response and response.status_code == 200: + with open(cacheTarFileName, 'wb') as file: + file.write(response.content) + with tarfile.open(cacheTarFileName, "r:gz") as tar: + members = tar.getmembers() + if len(members) != 3: + return ResultType(60015, "Invalid DataPack data", None) + for member in members: + nameSplit = member.name.split('/') + if len(nameSplit) == 2: + if member.name.endswith(minddatapack.utils.METADATA_EXT): + metaFileName = member.name + elif member.name.endswith('.csv'): + dataFileName = member.name + tar.extract(member) + if metaFileName != dataFileName + minddatapack.utils.METADATA_EXT: + return ResultType(60015, "Invalid DataPack data", None) + return dataPack._loadFromLocalFile(dataFileName) + else: + return ResultType(60001, "Network error", None) + except Exception as e: + logging.debug("Exception:", str(e)) + return ResultType(60014, "Fail to connect to IPFS", None) + finally: + if os.path.exists(cacheTarFileName): + os.remove(cacheTarFileName) + if dataFileName and os.path.exists(dataFileName): + os.remove(dataFileName) + if metaFileName and os.path.exists(metaFileName): + os.remove(metaFileName) + if os.path.exists(ipfsCID): + os.rmdir(ipfsCID) diff --git a/minddatapack/localfileconnector.py b/minddatapack/localfileconnector.py index 1337df8..90fa568 100644 --- a/minddatapack/localfileconnector.py +++ b/minddatapack/localfileconnector.py @@ -13,6 +13,20 @@ from mindlakesdk.utils import ResultType, DataType import minddatapack.utils +class LocalFileConnector(minddatapack.utils.Connector): + def __init__(self, ignoreEncrypt: bool = False, columns: list = None): + self.ignoreEncrypt = ignoreEncrypt + self.columns = columns + + def save(self, dataPack: "minddatapack.DataPack", filePath: str) -> ResultType: + return dataPack._saveToLocalFile(filePath, self.ignoreEncrypt) + + def load(self, dataPack: "minddatapack.DataPack", filePath: str) -> ResultType: + if self.columns: + return dataPack._loadFromCSVFileByDefineColumn(filePath, self.columns) + else: + return dataPack._loadFromLocalFile(filePath) + def __encrypt(data, columnMeta: minddatapack.utils.Column) -> ResultType: try: data = __encodeByDataType(data, columnMeta.type) @@ -49,7 +63,7 @@ def __decrypt(cipher: str, columnMeta: minddatapack.utils.Column) -> ResultType: return ResultType(60013, 'Decrypt data failed') return ResultType(0, "Success", data) -def saveToLocalFile(dataPack, filePath: str, ignoreEncrypt: bool, columns: list, walletAccount): +def saveToLocalFile(dataPack: "minddatapack.DataPack", filePath: str, ignoreEncrypt: bool, columns: list, walletAccount): if not dataPack.existData: return ResultType(60006, 'No data in DataPack') dataPack.fileName = os.path.basename(filePath) @@ -102,13 +116,13 @@ def __buildMetadata(fileName: str, ignoreEncrypt: bool, fileHash: str, columns: metadata['Column'].append(columnMeta) return metadata -def loadFromLocalFile(dataPack, filePath: str, walletAccount): +def loadFromLocalFile(dataPack: "minddatapack.DataPack", filePath: str, walletAccount): # the path of the meta file should be filePath + '.meta.json' metaFilePath = filePath + minddatapack.utils.METADATA_EXT if not os.path.exists(filePath): - return ResultType(60007, 'CSV File not found') + return ResultType(60007, 'CSV File not found'), None if not os.path.exists(metaFilePath): - return ResultType(60008, 'Metadata file not found') + return ResultType(60008, 'Metadata file not found'), None with open(metaFilePath, 'r') as file: metadata = json.load(file) dataPack.fileName = metadata['FileName'] @@ -133,7 +147,7 @@ def loadFromLocalFile(dataPack, filePath: str, walletAccount): if not ignoreEncrypt and columns[index].encrypt: decryptResult = __decrypt(cell, columns[index]) if not decryptResult: - return decryptResult + return decryptResult, None rowDecoded.append(decryptResult.data) else: if columns[index].type == DataType.int4 or columns[index].type == DataType.int8: @@ -143,14 +157,19 @@ def loadFromLocalFile(dataPack, filePath: str, walletAccount): elif columns[index].type == DataType.decimal: rowDecoded.append(Decimal(cell)) elif columns[index].type == DataType.timestamp: - rowDecoded.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S.%f')) + if(len(cell) == 19): + rowDecoded.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S')) + elif(len(cell) == 26): + rowDecoded.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S.%f')) + else: + return ResultType(60015, 'Invalid timestamp format'), None else: rowDecoded.append(cell) dataPack.data.append(rowDecoded) dataPack.existData = True return ResultType(0, "Success"), columns -def loadFromCSVFileByDefineColumn(dataPack, csvFilePath: str, columns: list): +def loadFromCSVFileByDefineColumn(dataPack: "minddatapack.DataPack", csvFilePath: str, columns: list): # the whole csv file should be in plain text if not os.path.exists(csvFilePath): return ResultType(60007, 'CSV File not found') @@ -178,7 +197,12 @@ def loadFromCSVFileByDefineColumn(dataPack, csvFilePath: str, columns: list): elif columns[index].type == DataType.decimal: formattedRow.append(Decimal(cell)) elif columns[index].type == DataType.timestamp: - formattedRow.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S.%f')) + if(len(cell) == 19): + formattedRow.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S')) + elif(len(cell) == 26): + formattedRow.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S.%f')) + else: + return ResultType(60015, 'Invalid timestamp format'), None else: formattedRow.append(cell) dataPack.data.append(formattedRow) diff --git a/minddatapack/mindlakeconnector.py b/minddatapack/mindlakeconnector.py index 7d892b6..2e97320 100644 --- a/minddatapack/mindlakeconnector.py +++ b/minddatapack/mindlakeconnector.py @@ -1,7 +1,18 @@ +import datetime import mindlakesdk from mindlakesdk.utils import ResultType, DataType import minddatapack.utils +class MindLakeConnector(minddatapack.utils.Connector): + def __init__(self, mindLake: mindlakesdk.MindLake): + self.mindLake = mindLake + + def save(self, dataPack: "minddatapack.DataPack", tableName: str) -> ResultType: + return dataPack._saveToMindLake(tableName, self.mindLake) + + def load(self, dataPack: "minddatapack.DataPack", sqlStatement: str) -> ResultType: + return dataPack._loadFromMindByQuery(sqlStatement, self.mindLake) + def loadFromMindByQuery(dataPack, sqlStatement: str, mindLake: mindlakesdk.MindLake) -> ResultType: result = mindLake.datalake.queryForDataAndMeta(sqlStatement) if not result: @@ -31,6 +42,13 @@ def loadFromMindByQuery(dataPack, sqlStatement: str, mindLake: mindlakesdk.MindL if not decryptResult: return decryptResult rowResult.append(decryptResult.data) + elif columns[index].type == DataType.timestamp: + if(len(cell) == 19): + rowResult.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S')) + elif(len(cell) == 26): + rowResult.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S.%f')) + else: + return ResultType(60015, 'Invalid timestamp format'), None else: rowResult.append(cell) dataPack.data.append(rowResult) @@ -51,8 +69,10 @@ def saveToMindLake(tableName: str, mindLake: mindlakesdk.MindLake, data, columns return encryptResult insert_value = insert_value + "'" + encryptResult.data + "'," else: - if column.type == DataType.text or column.type == DataType.timestamp: - insert_value = insert_value + "'" + str(cell) + "'," + if column.type == DataType.text: + insert_value = insert_value + "'" + cell + "'," + elif column.type == DataType.timestamp: + insert_value = insert_value + "'" + cell.strftime('%Y-%m-%d %H:%M:%S.%f') + "'," else: insert_value = insert_value + str(cell) + "," result = mindLake.datalake.query(f'INSERT INTO "{tableName}" VALUES ({insert_value[:-1]})') diff --git a/minddatapack/utils.py b/minddatapack/utils.py index f09d688..907ee7c 100644 --- a/minddatapack/utils.py +++ b/minddatapack/utils.py @@ -1,6 +1,7 @@ import mindlakesdk from mindlakesdk.utils import ResultType, DataType from mindlakesdk.datalake import DataLake +from abc import ABC, abstractmethod METADATA_EXT = '.meta.json' CACHE_PREFIX = 'datapack_cache_' @@ -14,4 +15,17 @@ def __init__(self, columnName: str, dataType: DataType, encrypt: bool, dataKey: else: self.dataKey = mindlakesdk.utils.genAESKey() else: - self.dataKey = None \ No newline at end of file + self.dataKey = None + +class Connector(ABC): + @abstractmethod + def __init__(self): + raise NotImplementedError + + @abstractmethod + def save(self, dataPack, name: str) -> ResultType: + raise NotImplementedError + + @abstractmethod + def load(self, dataPack, identifier: str) -> ResultType: + raise NotImplementedError \ No newline at end of file diff --git a/minddatapack/web3storageconnector.py b/minddatapack/web3storageconnector.py new file mode 100644 index 0000000..5e4592b --- /dev/null +++ b/minddatapack/web3storageconnector.py @@ -0,0 +1,84 @@ +from datetime import datetime +import zipfile +import requests +import json +import logging +import os +from mindlakesdk.utils import ResultType +import minddatapack.utils + +class Web3StorageConnector(minddatapack.utils.Connector): + def __init__(self, apiToken: str = None): + self.apiToken = apiToken + + def save(self, dataPack: "minddatapack.DataPack", fileName: str) -> ResultType: + cacheZipFileName = minddatapack.utils.CACHE_PREFIX + datetime.now().strftime("%Y%m%d%H%M%S%f") + '.zip' + result = dataPack._saveToLocalFile(fileName) + if not result: + return result + metaFileName = fileName + minddatapack.utils.METADATA_EXT + try: + with zipfile.ZipFile(cacheZipFileName, 'w') as zip: + zip.write(fileName) + zip.write(metaFileName) + zipFile = open(cacheZipFileName, 'rb') + files = [ + ('file', (cacheZipFileName, zipFile, 'application/zip')) + ] + headers = { + "Authorization": "Bearer " + self.apiToken + } + response = requests.post('https://api.web3.storage/upload', headers=headers, files=files) + if response and response.status_code == 200: + ipfsHash = json.loads(response.text)['cid'] + return ResultType(0, "Success", ipfsHash) + else: + return ResultType(60001, "Network error", None) + except Exception as e: + logging.debug("Exception:", str(e)) + return ResultType(60014, "Fail to connect to IPFS", None) + finally: + if os.path.exists(fileName): + os.remove(fileName) + if os.path.exists(metaFileName): + os.remove(metaFileName) + if os.path.exists(cacheZipFileName): + os.remove(cacheZipFileName) + + def load(self, dataPack: "minddatapack.DataPack", ipfsCID: str): + cacheZipFileName = minddatapack.utils.CACHE_PREFIX + datetime.now().strftime("%Y%m%d%H%M%S%f") + '.zip' + metaFileName = None + dataFileName = None + try: + response = requests.get("https://w3s.link/ipfs/" + ipfsCID, stream=True) + if response and response.status_code == 200: + with open(cacheZipFileName, 'wb') as file: + for chunk in response.iter_content(chunk_size=1024*64): + file.write(chunk) + with zipfile.ZipFile(cacheZipFileName, "r") as zip: + fileNames = zip.namelist() + if len(fileNames) != 2: + return ResultType(60015, "Invalid DataPack data", None) + for fileName in fileNames: + if fileName.endswith(minddatapack.utils.METADATA_EXT): + metaFileName = fileName + elif fileName.endswith('.csv'): + dataFileName = fileName + if metaFileName != dataFileName + minddatapack.utils.METADATA_EXT: + return ResultType(60015, "Invalid DataPack data", None) + zip.extractall() + return dataPack._loadFromLocalFile(dataFileName) + else: + return ResultType(60001, "Network error", None) + except Exception as e: + logging.debug("Exception:", str(e)) + return ResultType(60014, "Fail to connect to IPFS", None) + finally: + if os.path.exists(cacheZipFileName): + os.remove(cacheZipFileName) + if dataFileName and os.path.exists(dataFileName): + os.remove(dataFileName) + if metaFileName and os.path.exists(metaFileName): + os.remove(metaFileName) + if os.path.exists(ipfsCID): + os.rmdir(ipfsCID) diff --git a/pyproject.toml b/pyproject.toml index bc70519..022a2c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "minddatapack" -version = "v1.0.1" +version = "v1.0.3" authors = [ { name="Mind Labs", email="biz@mindnetwork.xyz" }, ]