diff --git a/chipy/videos/15260.json b/chipy/videos/15260.json index 6207a8bca..640c36ba2 100644 --- a/chipy/videos/15260.json +++ b/chipy/videos/15260.json @@ -7,7 +7,6 @@ "quality_notes": null, "recorded": "2023-05-11T19:00:00", "slug": "Learning_Sprint_An_Experiment", - "source_url": "https://youtu.be/JIMSp2Vqjgc", "speakers": [ "Eve Qiao", "Ray Berg" @@ -35,6 +34,5 @@ "label": "conf", "url": "https://www.chipy.org/meetings/223/" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/chipy/videos/15261.json b/chipy/videos/15261.json index 971205cf9..ee96bf0aa 100644 --- a/chipy/videos/15261.json +++ b/chipy/videos/15261.json @@ -7,7 +7,6 @@ "quality_notes": null, "recorded": "2023-05-11T19:40:00", "slug": "Micropython_gpio", - "source_url": "https://youtu.be/6wc452U2Gzw", "speakers": [ "Andrew Wingate" ], @@ -37,6 +36,5 @@ "label": "conf", "url": "https://www.chipy.org/meetings/223/" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/chipy/videos/15262.json b/chipy/videos/15262.json index 67d6b353e..15ce729a4 100644 --- a/chipy/videos/15262.json +++ b/chipy/videos/15262.json @@ -7,7 +7,6 @@ "quality_notes": null, "recorded": "2023-05-11T20:10:00", "slug": "Ellipses_and_Arcane_Syntax", - "source_url": "https://youtu.be/fP3okTK49dI", "speakers": [ "Phil Robare" ], @@ -34,6 +33,5 @@ "label": "conf", "url": "http://www.chipy.org/" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/chipy/videos/15266.json b/chipy/videos/15266.json index a9e1a8286..6874696a0 100644 --- a/chipy/videos/15266.json +++ b/chipy/videos/15266.json @@ -7,7 +7,6 @@ "quality_notes": null, "recorded": "2023-06-08T18:52:00", "slug": "JSON_Web_Tokens_for_Fun_and_Profit", - "source_url": "https://youtu.be/gyUNW9Zkwv0", "speakers": [ "Heather White" ], @@ -34,6 +33,5 @@ "label": "conf", "url": "https://www.chipy.org/meetings/228/" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/chipy/videos/15267.json b/chipy/videos/15267.json index 4a4dad8d9..a3cebf718 100644 --- a/chipy/videos/15267.json +++ b/chipy/videos/15267.json @@ -7,7 +7,6 @@ "quality_notes": null, "recorded": "2023-06-08T19:30:59", "slug": "Exploring_the_Python_Run_Time_Environment", - "source_url": "https://youtu.be/ATSc5aLPSOc", "speakers": [ "Alexander Leopold Shon" ], @@ -34,6 +33,5 @@ "label": "conf", "url": "https://www.chipy.org/meetings/228/" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/chipy/videos/15344.json b/chipy/videos/15344.json index b06e0fdd6..c8225381c 100644 --- a/chipy/videos/15344.json +++ b/chipy/videos/15344.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2024-01-11T18:30:00", "slug": "Whats_in_your_AI_code_Learn_why_every_SCA_tool_is_wrong_and_how_to_deal_with_it", - "source_url": "https://youtu.be/HDT9K5rGvWo", "speakers": [ "Anand Sawant" ], @@ -33,6 +32,5 @@ "label": "conf", "url": "https://www.chipy.org/meetings/240/" } - ], - "veyepar_state": 8 + ] } \ No newline at end of file diff --git a/chipy/videos/15345.json b/chipy/videos/15345.json index 890832a74..87a059ee3 100644 --- a/chipy/videos/15345.json +++ b/chipy/videos/15345.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2024-01-11T19:05:00", "slug": "must_use_correc_snek_python_for_Debian_and_derivatives", - "source_url": "https://youtu.be/sHMKigxHBVA", "speakers": [ "Heather White" ], @@ -33,6 +32,5 @@ "label": "conf", "url": "https://www.chipy.org/meetings/240/" } - ], - "veyepar_state": 8 + ] } \ No newline at end of file diff --git a/chipy/videos/15346.json b/chipy/videos/15346.json index 4b66982d2..01527d835 100644 --- a/chipy/videos/15346.json +++ b/chipy/videos/15346.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2024-01-11T19:35:00", "slug": "BluPants_opensource_educational_Python_bots", - "source_url": "https://youtu.be/ZGXPW248azA", "speakers": [ "Marcelo Sacchetin" ], @@ -33,6 +32,5 @@ "label": "conf", "url": "https://www.chipy.org/meetings/240/" } - ], - "veyepar_state": 8 + ] } \ No newline at end of file diff --git a/chipy/videos/15352.json b/chipy/videos/15352.json index acd35da9c..442603caa 100644 --- a/chipy/videos/15352.json +++ b/chipy/videos/15352.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2024-04-11T18:30:00", "slug": "Intro_to_PropertyBased_Testing_with_Hypothesis", - "source_url": "https://youtu.be/bhRTEyGTRU0", "speakers": [ "Paul Zuradzki" ], @@ -33,6 +32,5 @@ "label": "conf", "url": "https://www.chipy.org/meetings/247/" } - ], - "veyepar_state": 5 + ] } \ No newline at end of file diff --git a/chipy/videos/15353.json b/chipy/videos/15353.json index 95e7841ba..67e4a3a85 100644 --- a/chipy/videos/15353.json +++ b/chipy/videos/15353.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2024-04-11T19:10:00", "slug": "Exploring_Cellular_Automata_in_Python_using_Golly", - "source_url": "https://youtu.be/cnG14Ue_B3w", "speakers": [ "Joshua Herman" ], @@ -33,6 +32,5 @@ "label": "conf", "url": "https://www.chipy.org/meetings/247/" } - ], - "veyepar_state": 5 + ] } \ No newline at end of file diff --git a/chipy/videos/15424.json b/chipy/videos/15424.json index 7bd124f40..edcccc711 100644 --- a/chipy/videos/15424.json +++ b/chipy/videos/15424.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2024-06-13T18:30:00", "slug": "Python_The_Language_for_Understanding_and_Building_the_Future_of_AI", - "source_url": "https://youtu.be/4f8rlX8J4_s", "speakers": [ "Paul Ebreo" ], @@ -33,6 +32,5 @@ "label": "conf", "url": "https://www.chipy.org/meetings/250/" } - ], - "veyepar_state": 6 + ] } \ No newline at end of file diff --git a/chipy/videos/15425.json b/chipy/videos/15425.json index 0f6f5a30a..489db4db0 100644 --- a/chipy/videos/15425.json +++ b/chipy/videos/15425.json @@ -33,6 +33,5 @@ "label": "conf", "url": "https://www.chipy.org/meetings/250/" } - ], - "veyepar_state": 6 + ] } \ No newline at end of file diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-apache-arrow-joris-van-den-bossche.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-apache-arrow-joris-van-den-bossche.json index 225606e34..cfe7569f6 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-apache-arrow-joris-van-den-bossche.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-apache-arrow-joris-van-den-bossche.json @@ -2,7 +2,6 @@ "description": "This talk discusses Apache Arrow project and how it already interacts\nwith the Python ecosystem.\n\nThe Apache Arrow project specifies a standardized language-independent\ncolumnar memory format for flat and nested data, organized for efficient\nanalytic operations on modern hardware. On top of that standard, it\nprovides computational libraries and zero-copy streaming messaging and\ninterprocess communication protocols, and as such, it provides a\ncross-language development platform for in-memory data. It has support\nfor many languages, including C, C++, Java, JavaScript, MATLAB, Python,\nR, Rust, ..\n\nThe Apache Arrow project, although still in active development, has\nalready several applications in the Python ecosystem. For example, it\nprovides the IO functionality for pandas to read the Parquet format (a\ncolumnar, binary file format used a lot in the Hadoop ecosystem). Thanks\nto the standard memory format, it can help improve interoperability\nbetween systems, and this is already seen in practice for the Spark /\nPython interface, by increasing the performance of PySpark. Further, it\nhas the potential to provide a more performant string data type and\nnested data types (like dicts or lists) for Pandas dataframes, which is\nalready being experimented with in the fletcher package (using the\npandas ExtensionArray interface).\n\nApache Arrow, defining a columnar, in-memory data format standard and\ncommunication protocols, provides a cross-language development platform\nwith already several applications in the PyData ecosystem.\n", "duration": 1789, "language": "eng", - "published_at": "2019-10-27T16:48:54.000Z", "recorded": "2019-09-04", "speakers": [ "Joris Van den Bossche" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-best-coding-practices-in-jupyterlab-alexander-cs-hendorf.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-best-coding-practices-in-jupyterlab-alexander-cs-hendorf.json index 8b2eae1bc..901922ec7 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-best-coding-practices-in-jupyterlab-alexander-cs-hendorf.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-best-coding-practices-in-jupyterlab-alexander-cs-hendorf.json @@ -2,7 +2,6 @@ "description": "| Jupyter notebooks are often a mess. The code produced is working for\n one notebook, but it's hard to maintain or to re-use.\n| In this talks I will present some best practices to make code more\n readable, better to maintain and re-usable.\n\n| This will include:\n| - versioning best practices\n| - how to use submodules\n| - coding methods to avoid (e.g. closures)\n\nJupyter notebooks are often a mess. The code produced is working for one\nnotebook, but it's hard to maintain or to re-use. In this talks I will\npresent some best practices to make code more readable, better to\nmaintain and re- usable.\n", "duration": 850, "language": "eng", - "published_at": "2019-10-27T17:38:59.000Z", "recorded": "2019-09-04", "speakers": [ "Alexander CS Hendorf" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-can-we-make-python-fast-without-sacrificing-readability-jl-cano.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-can-we-make-python-fast-without-sacrificing-readability-jl-cano.json index ee88c988a..1a12a85ad 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-can-we-make-python-fast-without-sacrificing-readability-jl-cano.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-can-we-make-python-fast-without-sacrificing-readability-jl-cano.json @@ -2,7 +2,6 @@ "description": "We are lucky there are very diverse solutions to make Python faster that\nhave been in use for a while: from wrapping compiled languages (NumPy),\nto altering the Python syntax to make it more suitable to compilers\n(Cython), to using a subset of it which can in turn be accelerated\n(numba). However, each of these options has a tradeoff, and there is no\nsilver bullet.\n\npoliastro is a library for Astrodynamics written in pure Python. All its\ncore algorithms are accelerated with numba, which allows poliastro to be\ndecently fast while having minimal code complexity and avoid using other\nlanguages.\n\nHowever, even though numba is quite mature as a library and most of the\nPython syntax and NumPy functions are supported, there are still some\nlimitations that affect its usage. In particular, we strive to offer a\nhigh-level API with support for physical units and reusable functions\nwhich can be passed as arguments, which sometimes require using complex\nobjects or introspective Python behavior which is not available.\n\nIn this talk we will discuss the strategies and workarounds we have\ndeveloped to overcome these problems, and what advanced numba features\nwe can leverage.\n\nThere are several solutions to make Python faster, and choosing one is\nnot easy: we would want it to be fast without sacrificing its\nreadability and high-level nature. We tried to do it for an\nAstrodynamics library using numba. How did it turn out?\n", "duration": 893, "language": "eng", - "published_at": "2020-03-06T17:39:14.000Z", "recorded": "2019-09-05", "speakers": [ "Juan Luis Cano Rodr\u00edguez" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-caterva-francesc-alterd.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-caterva-francesc-alterd.json index dbe048bee..c7e1e153b 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-caterva-francesc-alterd.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-caterva-francesc-alterd.json @@ -2,7 +2,6 @@ "description": "Caterva: A Compressed And Multidimensional Container For Big Data\n=================================================================\n\n`Caterva `__ is a C library on top of\n`C-Blosc2 `__ that implements a\nsimple multidimensional container for compressed binary data. It adds\nthe capability to store, extract, and transform data in these\ncontainers, either in-memory or on-disk.\n\nWhile there are several existing solutions for this scenario (HDF5 is\none of the most known), Caterva brings novel features that, when taken\ntoghether, set it appart from them:\n\n- **Leverage important features of C-Blosc2**. C-Blosc2 is the next\n generation of the well-know, high performance C-Blosc compression\n library (see below for a more in-depth description).\n\n- **Fast and seamless interface with the compression engine**. While in\n other solutions compression seems an after-thought and can implies\n several copies of buffers internally, the interface of Caterva and\n C-Blosc2 (its internal compression engine) is meant to be as direct\n as possible minimizing copies and hence, increasing performance.\n\n- **Both in-memory and on-disk paradigms are supported the same way**.\n This allows for using the same API for data that can be either\n in-memory or on-disk.\n\n- **Support for a plain buffer data layout**. This allows for\n essentially no-copy data sharing among existing libraries (NumPy),\n allowing to use existing functionality to be used directly in Caterva\n without loosing performance.\n\nAlong this features, there is an important 'mis-feature': Caterva is\n**type- less**. Lacking the notion of data type means that Caterva\ncontainers are not meant to be used in computations directly, but rather\nin combination with other higher-level libraries. While this can be seen\nas a drawback, it actually favors simplicity and leaves up to the user\nthe addition of the types that he is more interested in, which is far\nmore flexible than typed-aware libraries (HDF5, NumPy and many others).\n\nDuring our talk, we will describe all these Caterva features by using\n`cat4py `__, a Python wrapper for\nCaterva. Among the points to be discussed would be:\n\n- Introduction to the main features of Caterva.\n\n- Description of the basic data container and its usage.\n\n- Short discussion of different use cases:\n\n- Create and fill high dimensional arrays.\n\n- Get multi-dimensional slices out of the arrays.\n- How different compression codecs and filters in the pipeline affect\n store/retrieval performance.\n\nWe have been using Caterva in one of our internal projects for several\nmonths now, and we are pretty happy with the flexibility and easy-of-use\nthat it brings to us. This is why we decided to open-source it in the\nhope that it would benefit others, but also that others may help us in\ndeveloping it further ;-)\n\nAbout C-Blosc and C-Blosc2\n--------------------------\n\n`C-Blosc `__ is a high performance\ncompressor optimized for binary data. It has been designed to transmit\ndata to the processor cache faster than the traditional, non-compressed,\ndirect memory fetch approach via a memcpy() OS call. Blosc is the first\ncompressor (that we are aware of) that is meant not only to reduce the\nsize of large datasets on- disk or in-memory, but also to accelerate\nmemory-bound computations.\n\n`C-Blosc2 `__ is the new major\nversion of C-Blosc, with a revamped API and support for new compressors\nand new filters (data transformations), including filter pipelining,\nthat is, the capability to apply different filters during the\ncompression pipeline, allowing for more adaptability to the data to be\ncompressed. Dictionaries are also introduced, allowing better handling\nof redundancies among independent blocks and generally increasing\ncompression ratio and performance. Last but not least, there are new\ndata containers that are meant to overcome the 32-bit limitation of the\noriginal C-Blosc. Furthermore, the new data containers are available in\nvarious formats, including in-memory and on-disk implementations.\n\nCaterva is a library on top of the Blosc2 compressor that implements a\nsimple multidimensional container for compressed binary data. It adds\nthe capability to store, extract, and transform data in these\ncontainers, either in-memory or on-disk.\n", "duration": 1600, "language": "eng", - "published_at": "2019-10-27T17:07:02.000Z", "recorded": "2019-09-04", "speakers": [ "Francesc Alted" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-constrained-data-synthesis-nick-radcliffe.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-constrained-data-synthesis-nick-radcliffe.json index 86c9fe545..163cd7e57 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-constrained-data-synthesis-nick-radcliffe.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-constrained-data-synthesis-nick-radcliffe.json @@ -2,7 +2,6 @@ "description": "Synthetic data is useful in many contexts, including\n\n- providing \"safe\", non-private alternatives to data containing\n personally identifiable information\n- software and pipeline testing\n- software and service development\n- enhancing datasets for machine learning.\n\nSynthetic data is often created on a bespoke basis, and since the advent\nof generative adverserial networks (GANs) there has been considerable\ninterest and experimentation with using those as the basis for creating\nsynthetic data.\n\nWe have taken a different approach. We have worked for some years on\ndeveloping methods for automatically finding constraints that\ncharacterise data, and which can be used for testing data validity\n(so-called \"test-driven data analysis\", TDDA). Such constraints form (by\ndesign) a useful characterisation of the data from which they were\ngenerated. As a result, methods that generate datasets that match the\nconstraints necessarily construct datasets that match many of the\noriginal characteristics of the data from which the constraints were\nextracted.\n\nAn important aspect of datasets is the relationship between \"good\" (~\nvalid) and \"bad\" (~ invalid) data, both of which are typically present.\nSystems for creating useful, realistic synthetic data generally need to\nbe able to synthesize both kinds, in realistic mixtures.\n\nThis talk will discuss data synthesis from constraints, describing what\nhas been achieved so far (which includes synthesizing good and bad data)\nand future research directions.\n\nWe introduce a method for creating synthetic data \"to order\" based on\nlearned (or provided) constraints and data classifications. This\nincludes \"good\" and \"bad\" data.\n", "duration": 1482, "language": "eng", - "published_at": "2019-12-01T09:58:56.000Z", "recorded": "2019-09-04", "speakers": [ "Nick Radcliffe" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-controlling-a-confounding-effect-in-predictive-analysis-darya-chyzhyk.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-controlling-a-confounding-effect-in-predictive-analysis-darya-chyzhyk.json index 5008aa20d..9e0ff06f3 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-controlling-a-confounding-effect-in-predictive-analysis-darya-chyzhyk.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-controlling-a-confounding-effect-in-predictive-analysis-darya-chyzhyk.json @@ -2,7 +2,6 @@ "description": "For instance, when predicting the salary to offer given the descriptions\nof professional experience, the risk is to capture indirectly a gender\nbias present in the distribution of salaries. Another example is found\nin biomedical applications, where for an automated radiology diagnostic\nsystem to be useful, it should use more than socio-demographic\ninformation to build its prediction.\n\nHere I will talk about confounds in predictive models. I will review\nclassic deconfounding techniques developed in a well-established\nstatistical literature, and how they can be adapted to predictive\nmodeling settings. Departing from deconfounding, I will introduce a\nnon-parametric approach \u2013that we named \u201cconfound-isolating\ncross-validation\u201d\u2013 adapting cross-validation experiments to measure the\nperformance of a model independently of the confounding effect.\n\nThe examples are mentioned in this work are related to the common issues\nin neuroimage analysis, although the approach is not limited to\nneuroscience and can be useful in another domains.\n\nConfounding effects are often present in observational data: the effect\nor association studied is observed jointly with other effects that are\nnot desired.\n", "duration": 889, "language": "eng", - "published_at": "2019-12-01T13:49:34.000Z", "recorded": "2019-09-04", "speakers": [ "Darya Chyzhyk" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-dashboarding-with-jupyter-notebooks-maarten-and-martin.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-dashboarding-with-jupyter-notebooks-maarten-and-martin.json index 9ff701810..8fa0679bc 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-dashboarding-with-jupyter-notebooks-maarten-and-martin.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-dashboarding-with-jupyter-notebooks-maarten-and-martin.json @@ -2,7 +2,6 @@ "description": "| Sharing the result of a Jupyter notebook is currently not an easy\n path. With voila we are changing this. Voila is a small but important\n ingredient in the Jupyter ecosystem. Voila can execute notebooks,\n keeping the kernel connected but does not allow for arbitrary code\n execution, making it safe to share your notebooks with others.\n| With new libraries built on top of Jupyter widgets/ipywidgets\n (ipymaterialui and ipyvuetify) we allow beautiful modern React and Vue\n components to enter the Jupyter notebook. Using voila we can integrate\n the ipywidgets seamlessly into modern React and Vue pages, to build\n modern dashboards directly from a Jupyter notebook.\n| I will give a live example on how to transform a Jupyter notebook into\n a fully functional single page application with a modern (Material\n Design) look.\n\nTurn your Jupyter notebook into a beautiful modern React or Vue based\ndashboard using voila and Jupyter widgets.\n", "duration": 1827, "language": "eng", - "published_at": "2019-12-02T13:26:11.000Z", "recorded": "2019-09-04", "speakers": [ "Maarten Breddels", diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-data-sciences-in-a-polyglot-world-with-xtensor-and-xframe-sylvain-and-wolf.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-data-sciences-in-a-polyglot-world-with-xtensor-and-xframe-sylvain-and-wolf.json index 1e7ba6454..2304d5e73 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-data-sciences-in-a-polyglot-world-with-xtensor-and-xframe-sylvain-and-wolf.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-data-sciences-in-a-polyglot-world-with-xtensor-and-xframe-sylvain-and-wolf.json @@ -2,7 +2,6 @@ "description": "In this presentation, we demonstrate how xtensor can be used to\nimplement numerical methods very efficiently in C++, with a high-level\nnumpy-style API, and expose it to Python, Julia, and R for free. The\nresulting native extension operates in-place on Python, Julia, and R\ninfrastructures without overhead.\n\nWe then dive into the xframe package, a dataframe project for the C++\nprogramming language, exposing an API very similar to Python's xarray.\n\nFeatures of xtensor and xframe will be demonstrated using the xeus-cling\njupyter kernel, enabling interactive use of the C++ programming language\nin the notebook.\n\nThe main scientific computing programming languages have different\nmodels the main data structures of data science such as dataframes and\nn-d arrays. In this talk, we present our approach to reconcile the data\nscience tooling in this polyglot world.\n", "duration": 1583, "language": "eng", - "published_at": "2020-03-06T16:20:25.000Z", "recorded": "2019-09-05", "speakers": [ "Sylvain Corlay", diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-deep-learning-for-understanding-human-multi-modal-behavior-ricardo-manhaes.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-deep-learning-for-understanding-human-multi-modal-behavior-ricardo-manhaes.json index 2d70563e7..5c2bdcf58 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-deep-learning-for-understanding-human-multi-modal-behavior-ricardo-manhaes.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-deep-learning-for-understanding-human-multi-modal-behavior-ricardo-manhaes.json @@ -2,7 +2,6 @@ "description": "| Multimedia automatic learning has drawn attention from companies and\n governments for a significant number of applications for automated\n recommendations, classification, and human brain understatement. In\n recent years, and an increased amount of research has explored using\n deep neural networks for multimedia related tasks.\n| Some government security and surveillance applications are automated\n detections of illegal and violent behaviors, child pornography and\n traffic infractions. Companies worldwide are looking for content-based\n recommendation systems that can personalize clients consumption and\n interactions by understanding the human perception of memorability,\n interestingness, attractiveness, aesthetics. For these fields like\n event detection, multimedia affect and perceptual analysis are turning\n towards Artificial Neural Networks. In this talk, I will present the\n theory behind multi-modal fusion using deep learning and some open\n challenges and their state-of-the-art.\n\nMulti-modal sources of information are the next big step for AI. In this\ntalk, I will present the use of deep learning techniques for automated\nmulti-modal applications and some open benchmarks.\n", "duration": 870, "language": "eng", - "published_at": "2020-03-06T12:30:17.000Z", "recorded": "2019-09-05", "speakers": [ "Ricardo Manh\u00e3es Savii" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-deep-learning-without-a-phd-paige-bailey.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-deep-learning-without-a-phd-paige-bailey.json index 253eb8f5d..7be8313e1 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-deep-learning-without-a-phd-paige-bailey.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-deep-learning-without-a-phd-paige-bailey.json @@ -2,7 +2,6 @@ "description": "EuroSciPy 2019 Bilbao \nSeptember 5, Thursday \nMitxelena. Talk. 11.00\n\nDeep Learning without a PhD\nPaige Bailey", "duration": 1694, "language": "eng", - "published_at": "2020-03-06T12:30:17.000Z", "recorded": "2019-09-05", "speakers": [ "Paige Bailey" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-distributed-gpu-computing-with-dask-peter-entschev.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-distributed-gpu-computing-with-dask-peter-entschev.json index 63608ae03..5aa5793ba 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-distributed-gpu-computing-with-dask-peter-entschev.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-distributed-gpu-computing-with-dask-peter-entschev.json @@ -2,7 +2,6 @@ "description": "The need for speed remains important for scientific computing.\nHistorically, computers were limited to few dozens of processors, but\nwith modern GPUs, we can have thousands, or even millions of cores\nrunning in parallel on distributed systems.\n\nHowever, developing software for distributed GPU systems can be\ndifficult, both because writing GPU code can be challenging for\nnon-experts, and because distributed systems are inherently complex. We\ncan work to address these challenges by using GPU-enabled libraries that\nmimic parts of the SciPy ecosystem, such as CuPy, RAPIDS, and Numba,\nabstracting GPU programming complexity, combined with Dask to abstract\ndistributed computing complexity.\n\nWe talk about how Dask has come a long way to support distributed\nGPU-enabled systems by leveraging community standards and protocols,\nreusing open source libraries for GPU computing, and keeping it simple\nand complication-free to build highly-configurable accelerated\ndistributed software.\n\nDask has evolved over the last year to leverage multi-GPU computing\nalongside its existing CPU support. We present how this is possible with\nthe use of NumPy-like libraries and how to get started writing\ndistributed GPU software.\n", "duration": 1560, "language": "eng", - "published_at": "2019-10-27T17:39:18.000Z", "recorded": "2019-09-04", "speakers": [ "Peter Andreas Entschev" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-driving-a-30m-radio-telescope-with-python-francesco-pierfederici.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-driving-a-30m-radio-telescope-with-python-francesco-pierfederici.json index 0c8c47705..50e913b05 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-driving-a-30m-radio-telescope-with-python-francesco-pierfederici.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-driving-a-30m-radio-telescope-with-python-francesco-pierfederici.json @@ -1,7 +1,6 @@ { "description": "The IRAM 30m radio telescope is one of the best in the world. It has\nbeen in operation non-stop since the mid 80s and is used to observe\n24-hours a day, 365 days a year. All of the high-level telescope control\nsoftware, monitoring, data archiving as well as some of the data\nprocessing software is written in Python. This choice, controversial at\nfirst, proved to be extremely successful making the IRAM 30m telescope\nextremely efficient.\n\nThis talk will describe how Python is used at the telescope, the reasons\nbehind these choices, lessons learned and future developments.\n\nThe IRAM 30m radio telescope is one of the best in the world. The\ntelescope control software, monitoring, data archiving as well as some\nof the data processing code is written in Python. We will describe how\nand why Python is used at the telescope.\n", "duration": 1620, - "published_at": "2020-03-06T15:07:44.000Z", "recorded": "2019-09-05", "speakers": [ "Francesco Pierfederici" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-emzed-a-framework-for-analysis-of-mass-spectrometry-data-uwe-schmitt.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-emzed-a-framework-for-analysis-of-mass-spectrometry-data-uwe-schmitt.json index 40b5cd7f8..929ef3b6f 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-emzed-a-framework-for-analysis-of-mass-spectrometry-data-uwe-schmitt.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-emzed-a-framework-for-analysis-of-mass-spectrometry-data-uwe-schmitt.json @@ -2,7 +2,6 @@ "description": "Many of the existing mass spectrometry data analysis tools are desktop\napplications designed for specific applications without support for\ncustomization. In addition, many of the commercial solutions offer no or\nonly limited functionality for exporting results.\n\nIn addition, the existing programming libraries in this area are\nscattered across different languages, mostly R, Java and Python.\n\nAs a result, data analysis in this area often consists of manual\nimport/export steps from/to various tools and self-developed scripts\nthat prevent the reproducibility of results obtained or automated\nexecution on high-performance infrastructures.\n\nemzed tries to avoid these problems by integrating existing libraries\nand tools from Python, R (and in the near future also Java) into an\neasy-to-use API.\n\n| To support workflow development and increase confidence in end results\n| emzed also offers tools for interactive visualization of mass\n spectrometry related data structures.\n\nThe presentation introduces basics and concepts of emzed, some lessons\nlearned and current development of the next version of emzed.\n\nThis talk is about emzed, a Python library to support biologists with\nlittle programming knowledge to implement ad-hoc analyses as well as\nworkflows for mass-spectrometry data.\n", "duration": 1738, "language": "eng", - "published_at": "2020-03-06T15:07:48.000Z", "recorded": "2019-09-05", "speakers": [ "Uwe Schmitt" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-high-performance-machine-learning-with-dislib-javier-alvarez.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-high-performance-machine-learning-with-dislib-javier-alvarez.json index 136f85b54..4424c1faa 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-high-performance-machine-learning-with-dislib-javier-alvarez.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-high-performance-machine-learning-with-dislib-javier-alvarez.json @@ -2,7 +2,6 @@ "description": "PyCOMPSs is a distributed programming model and runtime for Python.\nPyCOMPSs' main goal is to make distributed computing accessible to\nnon-expert developers by providing a simple programming model, and a\nruntime that automates many aspects of the parallel execution. In\naddition to this, PyCOMPSs is infrastructure agnostic, and can run on\ntop of a wide range of platforms, from HPC clusters to clouds, and from\nGPUs to FPGAs.\n\nThis talk will present dislib, a distributed machine learning library\nbuilt on top of PyCOMPSs. Inspired by scikit-learn, dislib programming\ninterface is based on the concept of *estimators*. This provides a clean\nand easy-to-use API that highly increases the productivity of building\nlarge-scale machine learning pipelines. Thanks to PyCOMPSs, dislib can\nrun in multiple distributed platforms without changes in the source\ncode, and can handle up to billions of input samples using thousands of\nCPU cores. This makes dislib a perfect tool for scientists (and other\nusers) that are not machine learning experts, but that still want to\nextract useful knowledge from extremely large data sets.\n\nThis talk will present dislib, a distributed machine learning library\nbuilt on top of PyCOMPSs programming model. One of the main focuses of\ndislib is solving large-scale scientific problems on high performance\ncomputing clusters.\n", "duration": 1060, "language": "eng", - "published_at": "2020-03-06T17:30:18.000Z", "recorded": "2019-09-05", "speakers": [ "Javier \u00c1lvarez" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-high-quality-video-experience-using-deep-neural-networks-marco-and-tiberio.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-high-quality-video-experience-using-deep-neural-networks-marco-and-tiberio.json index 7d2bb2f21..b4a9153af 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-high-quality-video-experience-using-deep-neural-networks-marco-and-tiberio.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-high-quality-video-experience-using-deep-neural-networks-marco-and-tiberio.json @@ -2,7 +2,6 @@ "description": "Video compression algorithms result in a reduction of image quality,\nbecause of their lossy approach to reduce the required bandwidth. This\naffects commercial streaming services such as Netflix, or Amazon Prime\nVideo, but affects also video conferencing and video surveillance\nsystems. In all these cases it is possible to improve the video quality,\nboth for human view and for automatic video analysis, without changing\nthe compression pipeline, through a post-processing that eliminates the\nvisual artefacts created by the compression algorithms. In this\npresentation we show how deep convolutional neural networks implemented\nin Python using TensorFlow, Scikit-Learn and Scipy can be used to reduce\ncompression artefacts and reconstruct missing high frequency details\nthat were eliminated by the compression algorithm.\n\n| In particular, we follow an approach based on Generative Adversarial\n Networks, that in the scientific literature have obtained extremely\n high quality results in image enhancement tasks. However, to obtain\n these results, typically, large generators are employed, resulting in\n high computational costs and processing time, and thus the method can\n be implemented using GPUs usually available only on desktop machines.\n| In this presentation we show also an architecture that can be used to\n reduce the computational cost and that can be implemented also on\n mobile devices.\n\nA possible application is to improve video conferencing, or live\nstreaming. Since in these cases there is no original uncompressed video\nstream available, we report results using no-reference video quality\nmetric showing high naturalness and quality even for efficient networks.\n\nVideo compression algorithms used to stream videos are lossy, and when\ncompression rates increase they result in strong degradation of visual\nquality. We show how deep neural networks can eliminate compression\nartefacts and restore lost details.\n", "duration": 2009, "language": "eng", - "published_at": "2020-03-06T12:30:17.000Z", "recorded": "2019-09-05", "speakers": [ "Marco Bertini", diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-high-voltage-lab-common-code-basis-library-mikolaj-rybinski.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-high-voltage-lab-common-code-basis-library-mikolaj-rybinski.json index bcd41eec4..e55b51819 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-high-voltage-lab-common-code-basis-library-mikolaj-rybinski.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-high-voltage-lab-common-code-basis-library-mikolaj-rybinski.json @@ -2,7 +2,6 @@ "description": "| At the heart of ETH High Voltage Lab's (HVL) research are industrial\n devices put\n| together into code-automated experiments. It's a zoo of industrial\n communication\n| protocols one needs to handle when controlling these devices. HVL\n decided to switch from\n| MATLAB to Python as a programming and analysis tool. Python community\n provides solutions\n| to majority of technicalities involved in handling multitude of\n industrial communication\n| protocols used to control high voltage research experiment devices.\n Moreover\n| Python seems to be a more future-proof choice, meeting industry demand\n for a more\n| cost-effective and collaborative solution.\n\n| The HVL Common Code Basis library (``hvl_ccb``) provides a uniform\n user-friendly\n| object-oriented API as well as implementation for multiple of high\n voltage engineering\n| devices and their respective communication protocols. The library\n leverages Python's\n| open source community - implementations of specific communication\n protocols, but also\n| relies heavily on some of the languages newer features such as typing\n hints, dataclasses\n| or enums.\n\n| Python typing hints are used not only for their static type checking\n and autocompletion\n| support from IDEs, but also for dynamic type checking of the\n communication protocol's\n| and devices' configurations. The configurations themselves are a\n customized\n| implementation of Python's 3.7 dataclasses. Configurations properties\n rely heavily on\n| Python (advanced) enumerations.\n\n| Currently, the library supports serial port, VISA over TCP, Modbus\n TCP, LabJack LJM and\n| OPC UA communication protocols. These protocols are used within code\n abstraction of\n| devices such MBW973 SF6 Analyzer / dew point mirror, LabJack (T7-PRO)\n device, Schneider\n| Electric ILS2T stepper motor drive, Elektro-Automatik PSI9000 DC power\n supply, Rhode &\n| Schwarz RTO 1024 oscilloscope, or the Lab's state-of-the-art Supercube\n platform, which\n| encapsulates safety components, the voltage source, as well as other\n auxiliary devices.\n\nThe library leverages Python richness to provide a uniform user-friendly\nAPI for a zoo of industrial communication protocols used to control high\nvoltage engineering devices, together with abstraction and\nimplementations for such devices.\n", "duration": 1143, "language": "eng", - "published_at": "2019-12-02T13:27:09.000Z", "recorded": "2019-09-04", "speakers": [ "Miko\u0142aj Rybi\u0144ski" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-histogram-based-gradient-boosting-in-scikit-learn-021-olivier-grisel.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-histogram-based-gradient-boosting-in-scikit-learn-021-olivier-grisel.json index b1ea491e9..c3b7165a0 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-histogram-based-gradient-boosting-in-scikit-learn-021-olivier-grisel.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-histogram-based-gradient-boosting-in-scikit-learn-021-olivier-grisel.json @@ -2,7 +2,6 @@ "description": "scikit-learn 0.21 was recently released and this presentation will give\nan overview its main new features in general and present the new\nimplementation of Gradient Boosted Trees.\n\nGradient Boosted Trees (also known as Gradient Boosting Machines) are\nvery competitive supervised machine learning models especially on\ntabular data.\n\nScikit-learn offered a traditional implementation of this family of\nmethods for many years. However its computational performance was no\nlonger competitive and was dramatically dominated by specialized state\nof the art libraries such as XGBoost and LightGBM. The new\nimplementation in version 0.21 uses histograms of binned features to\nevaluate the tree node spit candidates. This implementation can\nefficiently leverage multi-core CPUs and is competitive with XGBoost and\nLightGBM.\n\nWe will also introduce pygbm, a numba-based implementation of gradient\nboosted trees that was used as prototype for the scikit-learn\nimplementation and compare the numba vs cython developer experience.\n\nIn this presentation we will present some recently introduced features\nof the scikit-learn Machine Learning library with a particular emphasis\non the new implementation of Gradient Boosted Trees.\n", "duration": 1614, "language": "eng", - "published_at": "2020-03-06T15:54:47.000Z", "recorded": "2019-09-05", "speakers": [ "Olivier Grisel" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-how-to-process-hyperspectral-data-matti-eskelinen.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-how-to-process-hyperspectral-data-matti-eskelinen.json index 12721cb52..84271078d 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-how-to-process-hyperspectral-data-matti-eskelinen.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-how-to-process-hyperspectral-data-matti-eskelinen.json @@ -2,7 +2,6 @@ "description": "Our lab specializes in hyperspectral imaging using a spectral imager\nthat combines tunable filters with colour sensors. Compared to simpler,\nmore established imaging systems, this results in some unique challenges\nfor the data processing. Especially, many of the original imaging\nparameters need to be preserved an d joined with calibration-derived\nvalues to actually compute radiance values from the raw sensor data\nsince they are not automatically handled by the hardware. Handling this\nmetadata with the resulting hyperspectral images results in combined\ndatasets of large 3-dimensional datacube, and multiple smaller 2D and 1D\narrays with linked dimensions.\n\nWe have built our solution to this problem utilizing Xarray for handling\nthe multiple arrays of data as well as the existing Dask integration for\nproviding easy parallelization for the required preprocessing. Xarray\nalso provides us many other advantages, such as:\n\n- Exploration of very complex multi-dimensional datasets (especially\n when utilizing holoviews)\n- Interoperability with the scikit ecosystem\n- Serialization to NetCDF preserving all the data in a single file\n\nHowever, our extensive and somewhat non-conventional use of Xarray does\nalso bring out it's shortcomings when trying to develop such a library\nas ours, such as indexing issues with multiple possible overlapping\ncoordinates and performance issues with complex datasets.\n\nWe present a collection of software for handling hyperspectral data\nacquisition and preprocessing fully in Python utilising Xarray for\nmetadata preservation from start to finish.\n", "duration": 986, "language": "eng", - "published_at": "2020-03-06T12:30:17.000Z", "recorded": "2019-09-05", "speakers": [ "Matti Eskelinen" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-inside-numpy-preparing-for-the-next-decade-matti-picus.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-inside-numpy-preparing-for-the-next-decade-matti-picus.json index 6dba9865c..26f657e1b 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-inside-numpy-preparing-for-the-next-decade-matti-picus.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-inside-numpy-preparing-for-the-next-decade-matti-picus.json @@ -2,7 +2,6 @@ "description": "Over the past year, and for the first time since its creation, NumPy has\nbeen operating with dedicated funding. NumPy developers think it has\ninvigorated the project and its community. But is that true, and how can\nwe know?\n\n| We will give an overview of the actions we\u2019ve taken, both successful\n and unsuccessful, to improve sustainability of the NumPy project and\n its community. We will draw some lessons from a first year of\n grant-funded activity, discuss key obstacles faced, attempt to\n quantify what we need to operate sustainably, and present a vision for\n the project and how we plan to realize it.\n| Topics we will cover include the following:\n| - Invigorating the community - what did we do, and are we correct in\n our opinion that it invigorated the community?\n| - doing things in the open as much as possible\n| - creating a roadmap\n| - NumPy Enhancement Proposal process\n| - commit rights\n| - in-person meetings\n\n- Measuring community/project health. We will use a number of published\n or proposed metrics to quantify this. Which ones do we think\n accurately represent the state of the project?\n- Lessons from the first grant and introducing paid work into a\n previously fully volunteer-driven project.\n- What is the best profile for a salaried employee?\n\n - Social profile\n - From inside or outside?\n\n- Have we succeeded in encouragin diversity?\n\n- A vision for future sustainabity\n\n- Models for obtaining and funneling funding\n\nOver the past year, and for the first time since its creation, NumPy has\nbeen operating with dedicated funding. NumPy developers think it has\ninvigorated the project and its community. But is that true, and how can\nwe know?\n", "duration": 1784, "language": "eng", - "published_at": "2020-03-06T12:30:17.000Z", "recorded": "2019-09-05", "speakers": [ "Matti Picus" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-jupyterlab-debugger-jeremy-tuloup.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-jupyterlab-debugger-jeremy-tuloup.json index b34395223..ba8c417cd 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-jupyterlab-debugger-jeremy-tuloup.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-jupyterlab-debugger-jeremy-tuloup.json @@ -2,7 +2,6 @@ "description": "EuroSciPy 2019 Bilbao \nSeptember 4, Wednesday \nBaroja Track. Talk. 15.45\n\nJupyterLab debugger\nJeremy Tuloup\n\nA talk about debugging jupyterLab.", "duration": 772, "language": "eng", - "published_at": "2019-12-01T14:09:03.000Z", "recorded": "2019-09-04", "speakers": [ "Jeremy Tuloup" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-keynote-talk-from-galaxies-to-brains-samuel-farrens.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-keynote-talk-from-galaxies-to-brains-samuel-farrens.json index d87a2e1db..a711e4166 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-keynote-talk-from-galaxies-to-brains-samuel-farrens.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-keynote-talk-from-galaxies-to-brains-samuel-farrens.json @@ -2,7 +2,6 @@ "description": "Programme\n~~~~~~~~~\n\n- The tutorial will begin with short introduction to the basic premise\n of sparsity and highlight some problems in astronomical image\n processing that can be solved using this methodology. (~15-20min;\n slides)\n- Tutees will then follow a hands-on demonstration of how the concept\n of sparsity can be used to denoise signals. (~30-35min; interactive\n jupyter notebook with exercises)\n- Finally the tutees will learn how to denoise an astronomical image\n and use their newfound skills to recover a nice picture of Saturn.\n (~35-40min; interactive jupyter notebook with an exercise)\n\nRequirements\n~~~~~~~~~~~~\n\n- The tutorial contents are available on\n `GitHub `__.\n- Provided tutees have a stable internet connection, the entire\n tutorial can be run online using\n `Binder `__.\n- However, to be safe, tutees should download and install the tutorial\n materials beforehand.\n\nThis tutorial will introduce the concept of *sparsity* and demonstrate\nhow it can be used to remove noise from signals. These concepts will\nthen be expanded to demonstrate how noise can be removed from\nastronomical images in particular.\n", "duration": 2827, "language": "eng", - "published_at": "2019-10-16T11:52:10.000Z", "recorded": "2019-09-03", "speakers": [ "Samuel FARRENS" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-keynote-talk-hpc-and-python-david-liu.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-keynote-talk-hpc-and-python-david-liu.json index a721833b0..970abb691 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-keynote-talk-hpc-and-python-david-liu.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-keynote-talk-hpc-and-python-david-liu.json @@ -2,7 +2,6 @@ "description": "EuroSciPy 2019 Bilbao \nSeptember 5, Thursday \nMitxelena. Keynote Talk. 9.15\n\nHPC and Python: Intel\u2019s work in enabling the scientific computing community\nDavid Liu", "duration": 2265, "language": "eng", - "published_at": "2020-03-06T10:30:31.000Z", "recorded": "2019-09-05", "speakers": [ "David Liu" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-keynote-talk-in-the-shadow-of-the-black-hole-sara-issaoun.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-keynote-talk-in-the-shadow-of-the-black-hole-sara-issaoun.json index 764681c5a..dec076ce9 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-keynote-talk-in-the-shadow-of-the-black-hole-sara-issaoun.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-keynote-talk-in-the-shadow-of-the-black-hole-sara-issaoun.json @@ -2,7 +2,6 @@ "description": "EuroSciPy 2019 Bilbao \nSeptember 5, Thursday \nMitxelena. Keynote Talk. 14.00\n\nIn the Shadow of the Black Hole\nSara Issaoun", "duration": 2649, "language": "eng", - "published_at": "2020-03-06T09:08:47.000Z", "recorded": "2019-09-05", "speakers": [ "Sara Issaoun" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-kubeflow-kale-from-jupyter-notebook-to-complex-pipelines-valerio-maggio.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-kubeflow-kale-from-jupyter-notebook-to-complex-pipelines-valerio-maggio.json index a9778b78a..9436ea97c 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-kubeflow-kale-from-jupyter-notebook-to-complex-pipelines-valerio-maggio.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-kubeflow-kale-from-jupyter-notebook-to-complex-pipelines-valerio-maggio.json @@ -2,7 +2,6 @@ "description": "In this talk I will present a new solution to automatically scale\nJupyter notebooks to complex and reproducibility pipelines based on\nKubernetes and KubeFlow.\n\nNowadays, most of the High Performance Computing (HPC) tasks are carried\nout in the Cloud, and this is as much as in industry as in research.\n\nMain advantages provided by the adoption of Cloud services include (a)\nconstant up-to-date hardware resources; (b) automated infrastructure\nsetup; (c) simplified resource management. Therefore, new solutions have\nbeen recently released to the community (e.g. *Kubernetes* by Google)\nproviding custom integrations to specifically support the migration of\nexisting Machine/Deep Learning pipelines to the Cloud.\n\nHowever, a shift towards a complete Cloud-based computational paradigm\nimposes new challenges in terms of data and model reproducibility,\nprivacy, accountability, and (efficient) resource configuration and\nmonitoring. Moreover, the adoption of these technologies still imposes\nadditional workloads requiring significant software and system\nengineering expertise (e.g. set up of containerised environments,\nstorage volumes, clusters nodes).\n\nIn this talk, I will present **kale** (``/\u02c8ke\u026ali\u02d0/``) - a new Python\nsolution to ease and support ML workloads for HPC in the Cloud is\npresented.\n\nKale leverages on the combination of Jupyter ``notebooks``, and\n*Kubernetes/Kubeflow Pipelines* (``KFP``) as core components in order\nto:\n\n- (``R1``) automate the setup and deployment procedures by automating\n the creation of (distributed) computation environments in the Cloud;\n\n- (``R2``) democratise the execution of machine learning models at\n scale by instrumented and reusable environments;\n\n- (``R3``) provide a simple interface (UI, and SDK) to enable\n researchers to deploy ML models without requiring extensive\n engineering expertise.\n\nTechnical features of Kale as well as open challenges and future\ndevelopment will be presented, along with working examples integrating\n``kale`` with the complete ML/DL workflows for pipeline reproducibility.\n\nDomains:\n^^^^^^^^\n\n- Jupyter\n- Machine Learning\n- DevOps\n- Parallel Computing/HPC\n\nGitHub:\n^^^^^^^\n\nhttps://github.com/orgs/kubeflow-kale\n", "duration": 1816, "language": "eng", - "published_at": "2020-03-06T15:07:44.000Z", "recorded": "2019-09-05", "speakers": [ "Valerio Maggio" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-last-moments-good-bye-euroscipy-2019.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-last-moments-good-bye-euroscipy-2019.json index 7f6237625..583700182 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-last-moments-good-bye-euroscipy-2019.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-last-moments-good-bye-euroscipy-2019.json @@ -2,7 +2,6 @@ "description": "EuroSciPy 2019 Bilbao \nSeptember 5, Thursday \nMitxelena. Last Moments. 18.15\n\nClosing notes, raffles, thanks to attendees, sponsors and organizer and big applause to the volunteers.", "duration": 868, "language": "eng", - "published_at": "2020-03-06T10:30:34.000Z", "recorded": "2019-09-06", "speakers": [ "Various speakers" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-lessons-learned-from-comparing-numba-cuda-and-c-cuda-lena-oden.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-lessons-learned-from-comparing-numba-cuda-and-c-cuda-lena-oden.json index 733334c81..8645041c0 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-lessons-learned-from-comparing-numba-cuda-and-c-cuda-lena-oden.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-lessons-learned-from-comparing-numba-cuda-and-c-cuda-lena-oden.json @@ -2,7 +2,6 @@ "description": "Numba allows the development of GPU code in Python style. When a Python\nscript using Numba is executed, the code is compiled just-in-time (JIT)\nusing the LLVM framework. Using Python for GPU programming can mean a\nconsiderable simplification in the development of parallel applications\ncompared to C and C-CUDA.\n\n| Python, however, has to live with the prejudice of low performance,\n especially in HighPerformance Computing.\n| We wanted to get to the bottom of whether this is really true and\n where these differences come from. For this reason, we first analyzed\n the performance of typical micro benchmarks used in HPC. By analyzing\n the assembly codes, we learned a lot about the difference between\n codes produced by C-CUDA and NUMBA- CUDA. Some of these insights have\n helped us to improve the performance of our application - and also of\n Numba-CUDA. With a few tricks it is possible to achieve very good\n performance with our Numba-Codes, which are very close - or sometimes\n even better than the C-CUDA versions.\n\nWe compared the performance of GPU-Applications written in C-CUDA and\nNumba- CUDA. By analyzing the GPU assembly code, we learned about the\nreasons for the differences. This helped us to optimize our codes\nwritten in NUMBA-CUDA and NUMBA itself.\n", "duration": 765, "language": "eng", - "published_at": "2019-10-27T17:39:09.000Z", "recorded": "2019-09-04", "speakers": [ "Lena Oden" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-lightning-talks-thursday.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-lightning-talks-thursday.json index 4141b0b6b..661beb176 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-lightning-talks-thursday.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-lightning-talks-thursday.json @@ -2,7 +2,6 @@ "description": "EuroSciPy 2019 Bilbao \nSeptember 5, Thursday \nMitxelena. Lightning Talks. 17.00\n\nThursday's lightning talks session", "duration": 4520, "language": "eng", - "published_at": "2020-03-06T09:09:44.000Z", "recorded": "2019-09-05", "speakers": [ "Various speakers" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-make-your-python-code-fly-pierre-augier.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-make-your-python-code-fly-pierre-augier.json index a7be865cd..f563c6a49 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-make-your-python-code-fly-pierre-augier.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-make-your-python-code-fly-pierre-augier.json @@ -2,7 +2,6 @@ "description": "Slides available at https://tiny.cc/euroscipy2019-transonic\n\n`Transonic `__ is a pure Python\npackage (requiring Python >= 3.6) to easily accelerate modern\nPython-Numpy code with different accelerators (like Cython,\n`Pythran `__, Numba,\nCupy, etc...) opportunistically (i.e. if/when they are available).\n\nWe will first present the context of the creation of this package, i.e.\nthe Python's High Performance Computing (HPC) Landscape. We will show\nhow Transonic can be used to write elegant and very efficient HPC codes\nwith Python, with examples taken from real-life research simulation\ncodes (`fluidfft `__ and\n`fluidsim `__). We will discuss the\nadvantages of using Transonic instead of writing big Cython extensions\nor using Numba or Pythran directly.\n\nA strategy to quickly develop a very efficient scientific\napplication/library with Python and Transonic could be:\n\n1. Use modern Python coding, standard Numpy/Scipy for the computations\n and all the cool libraries you want.\n\n2. Profile your applications on real cases, detect the bottlenecks and\n apply standard optimizations with Numpy.\n\n3. Add few lines of Transonic to compile the hot spots.\n\nWe won't forget to also discuss some limitations of Transonic, and more\ngenerally of Python and its numerical ecosystem for High Performance\nComputing.\n\n`Transonic `__ is a new pure Python\npackage to easily accelerate modern Python-Numpy code with different\naccelerators (like Cython, Pythran, Numba, Cupy, etc...).\n", "duration": 1255, "language": "eng", - "published_at": "2019-12-02T12:46:28.000Z", "recorded": "2019-09-04", "speakers": [ "Pierre Augier" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-matrix-calculus-with-sympy-francesco-bonazzi.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-matrix-calculus-with-sympy-francesco-bonazzi.json index 20902e7a3..2d18e9284 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-matrix-calculus-with-sympy-francesco-bonazzi.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-matrix-calculus-with-sympy-francesco-bonazzi.json @@ -2,7 +2,6 @@ "description": "The recent popularization of libraries relying on tensor algebra\noperations has led to a rise in the requirement of computational tools\nto calculate the gradient and hessian of tensorial expressions. The\nderivative of a tensor *A* by tensor *B* is the tensor containing all\ncombinations of the elements of *A* derived by the elements of *B*.\nWhile tensor derivative operations are commonly supported by most\ncomputer algebra systems and frameworks through iterative algorithms,\nthese derivatives can be expressed mathematically in closed-form\nsolutions, which are computationally many orders of magnitude faster.\n\nSymPy has been recently extended in order to support the computation of\nsymbolic matrix derivatives, and is currently the only computer algebra\nsystem endowed with this feature (lacking even in Wolfram Mathematica).\nMatrix calculus plays indeed a central role in optimization and machine\nlearning, but was unfortunately often limited to pen on papers or chalk\non blackboards.\n\nIn this talk, we will introduce matrix expressions in SymPy, and address\nthe three ways they can be represented:\n\n1. explicit matrices with symbolic entries,\n2. indexed symbols with proper summation convention,\n3. implicit matrix expressions.\n\nWe illustrate the way matrix derivatives are implemented for all three\nrepresentations, with special emphasis to the third way, the fastest and\nmost elegant. The derived expressions can then be passed to SymPy's code\ngeneration utilities and the resulting code can be compared in speed\nwith other frameworks, such as TensorFlow.\n\nThe support of matrix derivatives can turn SymPy into a simple tool to\ncreate the code for optimization algorithms or the code to train machine\nlearning algorithms. The code generation utilities of SymPy are indeed\naware of how to export matrix expressions into other programming\nlanguages and frameworks. We will give some examples using maximum\nlikelihood estimation and the expectation-maximization algorithms.\n\nIn this talk we explore a recent addition to SymPy which allows to find\nclosed-form solutions to matrix derivatives. As a consequence,\ngeneration of efficient code for optimization problems is now much\neasier.\n", "duration": 1694, "language": "eng", - "published_at": "2020-03-06T15:07:44.000Z", "recorded": "2019-09-05", "speakers": [ "Francesco Bonazzi" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-modern-data-science-maarten-and-jovan.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-modern-data-science-maarten-and-jovan.json index e80a98569..a51c33757 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-modern-data-science-maarten-and-jovan.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-modern-data-science-maarten-and-jovan.json @@ -2,7 +2,6 @@ "description": "Working with datasets comprising millions or billions of samples is an\nincreasingly common task, one that is typically tackled with distributed\ncomputing. Nodes in high-performance computing clusters have enough RAM\nto run intensive and well-tested data analysis workflows. More often\nthan not, however, this is preceded by the scientific process of\ncleaning, filtering, grouping, and other transformations of the data,\nthrough continuous visualizations and correlation analysis. In today\u2019s\nwork environments, many data scientists prefer to do this on their\nlaptops or workstations, as to more effectively use their time and not\nto rely on spotty internet connection to access their remote data and\ncomputation resources. Modern laptops have sufficiently fast I/O SSD\nstorage, but upgrading RAM is expensive or impossible.\n\nApplying the combined benefits of computational graphs, which are common\nin neural network libraries, with delayed (a.k.a lazy) evaluations to a\nDataFrame library enables efficient memory and CPU usage. Together with\nmemory-mapped storage (Apache Arrow, hdf5) and out-of-core algorithms,\nwe can process considerably larger data sets with fewer resources. As an\nadded bonus, the computational graphs \u2018remember\u2019 all operations applied\nto a DataFrame, meaning that data processing pipelines can be generated\nautomatically.\n\nIn this talk, we will demonstrate Vaex, an open-source DataFrame library\nthat embodies these concepts. Using data from the New York City\nYellowCab taxi service comprising 1.1 billion samples and taking up over\n170 GB on disk, we will showcase how one can conduct an exploratory data\nanalysis, complete with filtering, grouping, calculations of statistics\nand interactive visualisations on a single laptop in real time. Finally\nwe will show an example of how one can automatically build a machine\nlearning pipeline as a by-product of the exploratory data analysis using\nthe computational graphs in Vaex.\n\nWe will demonstrate how to explore and analyse massive datasets (>150GB)\non a laptop with the Vaex library in Python. Using computational graphs,\nefficient algorithms and storage (Apache Arrow / hdf5) Vaex can easily\nhandle up to a billion rows.\n", "duration": 2166, "language": "eng", - "published_at": "2019-10-27T17:49:55.000Z", "recorded": "2019-09-04", "speakers": [ "Jovan Veljanoski", diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-modin-scaling-the-capabilities-of-the-data-scientist-devin-petersohn.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-modin-scaling-the-capabilities-of-the-data-scientist-devin-petersohn.json index a74a6e5fc..bfe8ea675 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-modin-scaling-the-capabilities-of-the-data-scientist-devin-petersohn.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-modin-scaling-the-capabilities-of-the-data-scientist-devin-petersohn.json @@ -2,7 +2,6 @@ "description": "Modern data systems tend to heavily focus on optimizing for the system\u2019s\ntime. Some of these optimizations, however, are counterproductive to the\nend user\u2019s workflow and thought process. In this talk, we discuss the\ndesign of Modin, a DataFrame library, and how to optimize for the human\nsystem.\n\nModin is a project at UC Berkeley's RISELab designed to optimize for the\ndata scientist\u2019s time. Often when building a data system, the system\ndesigners will follow a set of \u201cbest practices\u201d in order to optimize\nperformance. These \u201cbest practices\u201d often require data scientists to\nunderstand and personally optimize concepts and system components that\nare not central to extracting value from their data.\n\nThe fundamental goal of data science is to extract value from data.\nDespite this, data systems are being built with user requirements such\nas: (1) knowledge of partitioning, (2) understanding laziness and what\ntriggers computation, (3) an entirely new API, and (4) where their code\nis running (e.g. locally, on-prem cluster, cloud). This overhead is\npassed to the data scientist, even though there is no overlap between\nthese new requirements and the fundamental goal of their profession.\n\nIn this talk, we will discuss how we think about the problem of large\nscale data science and optimizing for the human system. We will discuss\nthe system design of Modin, which enables pluggable backends, runtimes,\nand APIs. The system is designed to solve the needs of the data science\ncommunity regardless of an individual user\u2019s environment. Currently,\nModin supports the pandas API, and a proof of concept for SQL has been\nimplemented. Modin is completely open- source and can be found on\nGitHub: https://github.com/modin-project/modin.\n\nModern data systems tend to heavily focus on optimizing for the system\u2019s\ntime. In this talk, we discuss the design of Modin, a DataFrame library,\nand how to optimize for the human system.\n", "duration": 1039, "language": "eng", - "published_at": "2019-10-27T17:37:56.000Z", "recorded": "2019-09-04", "speakers": [ "Devin Petersohn" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-open-source-projects-updates.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-open-source-projects-updates.json index 9f8b4e7be..a66b53557 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-open-source-projects-updates.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-open-source-projects-updates.json @@ -2,7 +2,6 @@ "description": "EuroSciPy 2019 Bilbao \nSeptember 4, Wednesday \nMitxelena. Main Talks. 14.00\n\nOpen source project updates", "duration": 2651, "language": "eng", - "published_at": "2019-10-27T18:42:03.000Z", "recorded": "2019-09-04", "speakers": [ "Various speakers" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-probabilistic-data-structures-in-data-intensive-applications-andrii-gakhov.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-probabilistic-data-structures-in-data-intensive-applications-andrii-gakhov.json index 78a915c53..d3721c939 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-probabilistic-data-structures-in-data-intensive-applications-andrii-gakhov.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-probabilistic-data-structures-in-data-intensive-applications-andrii-gakhov.json @@ -2,7 +2,6 @@ "description": "*Nowadays, research in every scientific domain, from medicine to\nastronomy, is impossible without processing huge amounts of data to\ncheck hypotheses, find new relations, and make discoveries. However, the\ntraditional technologies which include data structures and algorithms,\nbecome ineffective or require too many resources. This creates a demand\nfor various optimization techniques, new data processing paradigms, and,\nfinally, appropriate algorithms.*\n\nThe presentation is dedicated to *probabilistic data structures* , that\nis a common name for advanced data structures based mostly on different\nhashing techniques. Unlike classical ones, these provide approximated\nanswers but with reliable ways to estimate possible errors and\nuncertainty. They are designed for extremely low memory requirements,\nconstant query time, and scaling, the factors that are essential for\ndata applications. It is hard to imagine a branch that requires learning\nfrom data, where they cannot be applicable.\n\nThey are not necessarily new. Probably, everybody knows about the Bloom\nfilter data structure, designed in the 70s, it efficiently solves the\nproblem of performing membership queries (a task to decide whether some\nelement belongs to the dataset or not) in a constant time without\nrequirements to store all elements. This is an example of a\nprobabilistic data structure, but there are much more that have been\ndesigned for various tasks in many domains.\n\nIn this talk, I explain **the five most important problems in data\nprocessing** that occurred in different domains but **can be efficiently\nsolved with probabilistic data structures and algorithms**. We cover the\n*membership querying* , *counting* of unique elements, *frequency* and\n*rank* estimation in data streams, and *similarity*.\n\nEverybody interested in such a topic is welcome to participate in\ncontributing a free and open-source Python (Cython) library called\n`PDSA `__.\n\nWe interact with an increasing amount of data but classical data\nstructures and algorithms can't fit our requirements anymore. This talk\nis to present the probabilistic algorithms and data structures and\ndescribe the main areas of their applications.\n", "duration": 1689, "language": "eng", - "published_at": "2020-03-06T15:07:43.000Z", "recorded": "2019-09-05", "speakers": [ "Andrii Gakhov" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-psydac-a-parallel-finite-element-solver-yaman-guclu.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-psydac-a-parallel-finite-element-solver-yaman-guclu.json index d33e57e25..73e4c1e97 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-psydac-a-parallel-finite-element-solver-yaman-guclu.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-psydac-a-parallel-finite-element-solver-yaman-guclu.json @@ -2,7 +2,6 @@ "description": "PSYDAC is a Python 3 library for the solution of partial differential\nequations. Its current focus is on isogeometric analysis using B-spline\nfinite elements, but extensions to other methodologies are under\nconsideration. In order to use PSYDAC, the user defines geometry and\nmodel equations in an abstract form using SymPDE, an extension of Sympy\nthat provides the mathematical expressions and checks their semantic\nvalidity. Once a finite element discretization has been chosen, PSYDAC\nmaps the abstract concepts into concrete objects, the basic building\nblocks being MPI-distributed vectors and matrices. Python code is\ngenerated for all the computationally intensive operations (matrix and\nvector assembly, matrix-vector products, etc.), and it is accelerated\nusing either Numba, Pythran, or Pyccel. We present the library design,\nthe user interface, and the performance results.\n\nPSYDAC takes input from SymPDE (a SymPy extension for partial\ndifferential equations), applies a finite-element discretization,\ngenerates MPI-parallel code, and accelerates it with Numba, Pythran, or\nPyccel. We present design, usage and performance.\n", "duration": 969, "language": "eng", - "published_at": "2020-03-06T17:38:47.000Z", "recorded": "2019-09-05", "speakers": [ "Yaman G\u00fc\u00e7l\u00fc" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-pyfeti-guilherme-jenovencio.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-pyfeti-guilherme-jenovencio.json index df135e435..d2cc7c41f 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-pyfeti-guilherme-jenovencio.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-pyfeti-guilherme-jenovencio.json @@ -2,7 +2,6 @@ "description": "PyFETI is a python implementation of\nFinite-Element-Tearing-Interconnecting Methods. The library provides a\nmassive linear solver that uses Domain Decomposition Techniques. FETI\nmethods rely in the solution of a linear system, based on to linear\nsolver algorithm strategies, Direct and Iteratively. A big problem is\ndecomposed in subdomains, generating an additional set of constraints at\nthe interface among subdomains. The local problem solution is formulated\nbased on a new interface force at the interface that must connect the\nsubdomains. Therefore, given an interface force, the local problems are\nsolved based on a direct solver, e.g SuperLU, and the update of\ninterface force is performed by Preconditioned Conjunged Projected\nGradient. The library has been tested for large linear elastic problems\nat the IT4I supercomputer center.\n\nPyFETI is a python implementation of\nFinite-Element-Tearing-Interconnecting Methods. The library provides a\nmassive linear solver using Domain Decomposition method, where problems\nare solved locally by Direct Solver and at the interface iteratively.\n", "duration": 1075, "language": "eng", - "published_at": "2019-12-02T13:27:12.000Z", "recorded": "2019-09-04", "speakers": [ "Guilherme Jenovencio" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-pypy-meets-scipy-ronan-lamy.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-pypy-meets-scipy-ronan-lamy.json index 8e8b12ae9..0e13e5fe3 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-pypy-meets-scipy-ronan-lamy.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-pypy-meets-scipy-ronan-lamy.json @@ -2,7 +2,6 @@ "description": "PyPy is a fast and compliant implementation of Python. In other words,\nit's an interpreter for the Python language that can act as a full\nreplacement for the reference interpreter, CPython. It's optimised to\nenable efficient just-in- time compilation of Python code to machine\ncode, and has releases matching versions 2.7, and 3.6. It now also\nsupports the main pillars of the scientific ecosystem (numpy, Cython,\nscipy, pandas, ...) thanks to its emulation layer for the C API of\nCPython.\n\nPerformance is a major concern for Python programmers. When using\nCPython, this leads to splitting out the performance-sensitive parts of\nthe computation and rewriting them in a faster, but less convenient,\nlanguage such as C or Cython. With PyPy, there is no need to choose\nbetween clear, Pythonic code and good performance. This talk aims to\nconvince the audience that PyPy should be part of every scientific\nprogrammer's toolbox.\n\nPyPy, the fast and compliant alternative implementation of Python, is\nnow compatible with the SciPy ecosystem. We'll explore how scientific\nprogrammers can use it.\n", "duration": 1666, "language": "eng", - "published_at": "2020-03-06T16:55:59.000Z", "recorded": "2019-09-05", "speakers": [ "Ronan Lamy" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-qutip-a-quantum-toolbox-in-python-alex-and-nathan.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-qutip-a-quantum-toolbox-in-python-alex-and-nathan.json index 1cfa64cd5..2bc16560c 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-qutip-a-quantum-toolbox-in-python-alex-and-nathan.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-qutip-a-quantum-toolbox-in-python-alex-and-nathan.json @@ -2,7 +2,6 @@ "description": "QuTiP is emerging as a library at the center of a lively ecosystem. In\nthis talk you will learn about the ongoing projects that have invested\nthis project, from providing the framework to simulate quantum machine\nlearning for quantum computers to the development of efficient numerical\nsolvers tackling dynamical problems that are inherently hard to simulate\nclassically.\n\nIt can be noted that\nAstropy is a\ncommunity effort to develop a common core package for Astronomy in\nPython and \"foster an ecosystem of interoperable astronomy packages\",\n\nIt seems an interesting model for the quantum tech landscape.\nQiskit <> did build its own ecosystem of sub-libraries for quantum\ncomputing. The physics library for quantum tech is http://qutip.org .\n\nAbout the idea of QuTiP as a super-library, here are some details:\n\n- | krotov, a very recent package for optimal control built on top\n of QuTiP ( https://arxiv.org/abs/1902.11284).\n | [https://github.com/qucontrol/krotov].\n\n- piqs, the permutational invariant quantum solver, now a QuTiP\n module (see also https://arxiv.org/abs/1805.05129 );\n\n- matsubara, a plugin to study the ultrastrong coupling regime with\n structured baths, http://matsubara.readthedocs.io/\n\n- QNET, a computer algebra package for quantum mechanics and\n photonic quantum networks, which actually calls QuTiP as a plugin,\n mainly developed at Stanford in Mabuchi Lab\n https://github.com/mabuchilab/QNET\n\n- qptomographer,\n https://qptomographer.readthedocs.io/en/latest/install, a library to\n derive error bars for experiments in quantum computing and quantum\n information processing.\n\n- tiqs, a library to study open quantum systems on extended\n lattices exploiting the symmetries of such systems,\n https://github.com/fminga/tiqs\n\n- other upcoming integrations relative to pulse control, such as\n qupulse,\n https://github.com/qutech/qupulse/wiki/Architecture-Proposal\n\nThis talk will be of interest to the curious coder and researcher,\nanalyzing how QuTiP's impact in the research community has fostered a\n*lingua franca* for quantum tech\nresearch . We\nwill also draw comparisons with other larger ecosystems in Python-based\nscientific projects, such as astropy and scikit-learn.\n\nMore about QuTiP\n================\n\n- QuTiP is the open-source software to study quantum physics. It\n develops both an intuitive playground to understand quantum mechanics\n and cutting-edge tools to investigate it.\n- QuTiP provides the most comprehensive toolbox to characterize noise\n and dissipation \u2013realistic processes\u2013 affecting quantum systems, as\n well as tools not only to monitor but also to minimize their impact\n (quantum optimal control, description of decoherence-free spaces).\n- For this reason QuTiP is a software born out of the quantum optics\n community and that has become increasingly relevant for the quantum\n computing community, as current quantum computing devices are noisy\n (NISQ definition by Preskill).\n- pypinfo data shows that QuTiP is popular in countries that are\n strong in quantum tech and quantum computing research, eg, The\n Netherlands in the top five, as well as countries that benefit in the\n use of open source software (OSS) for university coursework, eg,\n India.\n- In the past three years, there has been an evolution in the quantum\n tech community, which has embraced OSS.\n- OSS libraries are used as a means to grow the user base, as well as\n in a more structural way for quantum computers, as they provide cloud\n access to quantum devices, e.g., IBM Q.\n- QuTiP is the only major library that has continued to thrive in this\n ecosystem, competing with other library packages that are funded by\n corporations or VC-backed startups/\n- Since the tools of QuTiP provide a common ground to study quantum\n mechanics, it is important that this independent project is provided\n with the necessary support to thrive\n- As access to quantum computers becomes more and more widespread also\n for the use of data scientist and QuTiP's popularity grows even more\n for undergraduate and graduate courses, becoming the de-facto\n standard OSS to study quantum optical systems, it is imperative that\n the QuTiP library makes a quality jump to provide a comprehensive\n introduction to its tools for a much broader community of users.\n\n- QuTiP website: http://www.qutip.org/\n\n- GitHub repository: https://github.com/qutip\n- GitHub repository (QuTiP code): https://github.com/qutip/qutip\n- GitHub repository (QuTiP documentation):\n https://github.com/qutip/qutip-doc\n- GitHub repository (QuTiP tutorials):\n https://github.com/qutip/qutip-notebooks\n- | Latest version of the documetnation:\n | http://qutip.org/docs/latest/index.html\n\n- Historical archive of released documentation:\n http://qutip.org/documentation.html\n\nQuTiP core development team\n---------------------------\n\nQuTiP core development team: (Alex Pitchford, alex.pitchford@gmail.com).\nAdditional mentors will be the project's core contributors Nathan\nShammah (nathan.shammah@gmail.com), Shahnawaz Ahmed\n(shahnawaz.ahmed95@gmail.com) and Eric Giguere\n(eric.giguere@usherbrooke.ca).\n\nQuTiP is a project started by Robert J. Johansson and Paul Nation. Other\ncore developers have been Arne Grimso, Chris Granade and over other 44\ncontributors.\n\nReferences\n----------\n\n[1] J. R. Johansson, P. D. Nation, and F. Nori: \u201cQuTiP: An open-source\nPython framework for the dynamics of open quantum systems.\u201d, Comp. Phys.\nComm. 183, 1760\u20131772 (2012)\n\n[2] J. Robert Johansson, Paul D. Nation, and Franco Nori: \u201cQuTiP 2: A\nPython framework for the dynamics of open quantum systems.\u201d, Comp. Phys.\nComm. 184, 1234 (2013)\n\n[3] J. Preskill, \"Quantum Computing in the NISQ era and beyond.\" Quantum\n**2** , 79 (2018)\n\n[4] Mark Fingerhuth, Tom\u00e1\u0161 Babej, and Peter Wittek, Open source software\nin quantum computing, PLoS ONE 13 (12): e0208561 (2018).\n\n[5] N. Shammah, S. Ahmed, N. Lambert, S. De Liberato, and F. Nori, \"Open\nquantum systems with local and collective incoherent processes:\nEfficient numerical simulation using permutational invariance \" Phys.\nRev. A 98, 063815 (2018). Code at http://piqs.readthedocs.io\n\n[6] N. Lambert, S. Ahmed, M. Cirio, and F. Nori, \"Virtual excitations in\nthe ultra-strongly-coupled spin-boson model: physical results from\nunphysical modes\", arXiv preprint arXiv:1903.05892. Also\nhttp://matsubara.readthedocs.io\n\n**Other relevant material** :\n\n- Slides on QuTiP and the quantum-tech open source ecosystem (Nathan\n Shammah @ Berkeley Lab, 2019).\n PDF \n\n- \"The rise of open source in quantum physics\n research\" ,\n Nathan Shammah and Shahnawaz Ahmed, Nature's physics blog, January 9,\n 2019.\n\n- \"Bit to QuBit: Data in the age of quantum computers\", Shahnawaz\n Ahmed, PyData 2018, Warsaw, Poland, 2019. YouTube\n video .\n\nIn this talk you will learn how QuTiP, the quantum toolbox in Python\n(http://qutip.org), has emerged from a library to an *ecosystem*. QuTiP\nis used for education, to teach quantum physics. In research and\nindustry, for quantum computing simulation.\n", "duration": 1926, "language": "eng", - "published_at": "2019-12-01T09:15:48.000Z", "recorded": "2019-09-04", "speakers": [ "Nathan Shammah", diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-ramp-guillaume-and-joris.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-ramp-guillaume-and-joris.json index 1c269fe27..50ce786b9 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-ramp-guillaume-and-joris.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-ramp-guillaume-and-joris.json @@ -2,7 +2,6 @@ "description": "We will give an overview of the RAMP framework, which provides a\nplatform to organize reproducible and transparent data challenges.\n\nRAMP workflow is a python package used to define and formalize the data\nscience problem to be solved. It can be used as a standalone package and\nallows a user to prototype different solutions. In addition to RAMP\nworkflow, a set of packages have been developed allowing to share and\ncollaborate around the developer solutions. Therefore, RAMP database\nprovides a database structure to store the solutions of different users\nand the performance of these solutions. RAMP engine is the package to\nrun the user solutions (possibly on the cloud) and populate the\ndatabase. Finally, RAMP frontend is the web frontend where users can\nupload their solutions and which shows the leaderboard of the challenge.\n\nThe project is open-source and can be deployed on any local server. The\nframework has been used at the Paris-Saclay Center for Data Science for\nsetting up and solving about twenty scientific problems, for organizing\ncollaborative data challenges, for organizing scientific sub-communities\naround these events, and for training novice data scientists.\n\nThe RAMP (Rapid Analytics and Model Prototyping) framework provides a\nplatform to organize reproducible and transparent data challenges. We\nwill present the different framework bricks.\n", "duration": 943, "language": "eng", - "published_at": "2019-12-01T14:08:28.000Z", "recorded": "2019-09-04", "speakers": [ "Guillaume Lemaitre", diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-recent-advances-in-python-parallel-computing-pierre-glaser.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-recent-advances-in-python-parallel-computing-pierre-glaser.json index 909874e58..4284d7b34 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-recent-advances-in-python-parallel-computing-pierre-glaser.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-recent-advances-in-python-parallel-computing-pierre-glaser.json @@ -2,7 +2,6 @@ "description": "Parallel computing in Python: Current state and recent advances\n===============================================================\n\n| *Modern hardware is multi-core*. It is crucial for Python to provide\n| high-performance parallelism. This talk will expose to both\n data-scientists and\n| library developers the current state of affairs and the recent\n advances for\n| parallel computing with Python. The goal is to help practitioners and\n| developers to make better decisions on this matter.\n\n| I will first cover how Python can interface with parallelism, from\n leveraging\n| external parallelism of C-extensions \u2013especially the BLAS family\u2013 to\n Python's\n| multiprocessing and multithreading API. I will touch upon use cases,\n e.g single\n| vs multi machine, as well as and pros and cons of the various\n solutions for\n| each use case. Most of these considerations will be backed by\n benchmarks from\n| the `scikit-learn `__ machine\n| learning library.\n\n| From these low-level interfaces emerged higher-level parallel\n processing\n| libraries, such as concurrent.futures,\n| `joblib `__ and\n| `loky `__ (used by\n| `dask `__ and `scikit-learn `__)\n These\n| libraries make it easy for Python programmers to use safe and reliable\n| parallelism in their code. They can even work in more exotic\n situations, such\n| as interactive sessions, in which Python\u2019s native multiprocessing\n support tends\n| to fail. I will describe their purpose as well as the canonical\n use-cases they\n| address.\n\n| The last part of this talk will focus on the most recent advances in\n the Python\n| standard library, addressing one of the principal performance\n bottlenecks of\n| multi-core/multi-machine processing, which is data communication. We\n will\n| present a `new\n API `__\n| for shared-memory management between different Python processes, and\n| performance improvements for the serialization of large Python objects\n (`PEP\n 574 `__, `pickle\n extensions `__). These\n performance\n| improvements will be leveraged by distributed data science frameworks\n such as\n| dask, `ray `__ and\n| `pyspark `__.\n\n| *Modern hardware is multi-core*. It is crucial for Python to provide\n| efficient parallelism. This talk exposes the current state and\n advances\n| in Python parallelism, in order to help practitioners and developers\n take\n| better decisions on this matter.\n", "duration": 1802, "language": "eng", - "published_at": "2020-03-06T16:20:12.000Z", "recorded": "2019-09-05", "speakers": [ "Pierre Glaser" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-scientific-devops-nicholas-del-grosso.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-scientific-devops-nicholas-del-grosso.json index a938a7430..a2e0b09e3 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-scientific-devops-nicholas-del-grosso.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-scientific-devops-nicholas-del-grosso.json @@ -2,7 +2,6 @@ "description": "Open source and open science come together when the software is\naccessible, transparent, and owned by all. For data analysis pipelines\nthat grow in complexity beyond a single Jupyter notebook, this can\nbecome a challenge as the number of steps and software dependencies\nincrease. In this talk, Nicholas Del Grosso will review a variety of\ntools for packaging and managing a data analysis pipeline, showing how\nthey fit together and benefit the development, testing, deployment, and\npublication processes and the scientific community. In particular, this\ntalk will cover:\n\n- **Workflow managers** (e.g. Snakemake, PyDoit, Luigi) to combine\n complex pipelines into single applications.\n\n- **Container Solutions** (e.g. Docker and Singularity) to package and\n deploy the software on others' computers, including high-performance\n computing clusters.\n\n- **The Scientific Filesystem** to build explorable and multi-purpose\n applications.\n\n- **Testing Frameworks** (e.g. PyTest, Hypothesis) to declare and\n confirm the assumptions and functionality of the analysis pipeline.\n\n- **Ease-of-Use Utilities** to share the pipeline online and make it\n accessible to non-programmers.\n\nBy writing software that stays manageable, reproducible, and deployable\ncontinuously throughout the development cycle, we can better fulfill the\ngoals of open science and good scientific practice in a digital era.\n\nA review of DevOps tools as applied to data analysis pipelines,\nincluding workflow managers, software containers, testing frameworks,\nand online repositories for performing reproducible science that scales.\n", "duration": 1832, "language": "eng", - "published_at": "2019-12-02T12:06:20.000Z", "recorded": "2019-09-04", "speakers": [ "Nicholas Del Grosso" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-telapy-compute-free-surface-flows-and-sediments-transport-yoann-audouin.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-telapy-compute-free-surface-flows-and-sediments-transport-yoann-audouin.json index f01add98c..22e225aa0 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-telapy-compute-free-surface-flows-and-sediments-transport-yoann-audouin.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-telapy-compute-free-surface-flows-and-sediments-transport-yoann-audouin.json @@ -1,7 +1,6 @@ { "description": "This talk is focused on the application of TelApy module\n(`www.opentelemac.org `__). TelApy aims to\nprovide a Python wrapper of TELEMAC-MASCARET API (Application Program\nInterface). The goal of TelApy is to have a full control on the\nsimulation while running a case. For example, it must allow the user to\nstop the simulation at any time step, get values of some variables and\nchange them. In order to make this possible, a Fortran structure called\ninstantiation was developed with the API. It contains a list of strings\npointing to TELEMAC variables. This gives direct access to the physical\nmemory of variables, and allows therefore to get and set their values.\nFurthermore, changes have been made in TELEMAC-MASCARET main subroutines\nto make hydraulic cases execution possible time step by time step. It is\nuseful to drive the TELEMAC-MASCARET SYSTEM APIs using Python\nprogramming language. In fact, Python is a portable, dynamic,\nextensible, free language, which allows (without imposing) a modular\napproach and object oriented programming. In addition of benefits of\nthis programming language, Python offers a large amounts of\ninteroperable libraries. The link between various interoperable\nlibraries with TELEMAC-MASCARET SYSTEM APIs allows the creation of an\never more efficient computing chain able to more finely respond to\nvarious complex problems. Therefore, the TelApy module has the ambition\nto enable a new way of use for the TELEMAC-MASCARET system. In\nparticular one can think about high performance computing for the\ncalculation of uncertainties, optimization, code coupling and so on. The\nobjectives of this talk is to present some examples of the TelApy module\nin the case of Uncertainty Quantification, Optimization, Reduced Order\nModel.\n\nTelApy a Python module to compute free surface flows and sediments\ntransport in geosciences and examples of how it is used to inter-operate\nwith other Python libraries for Uncertainty Quantification,\nOptimization, Reduced Order Model.\n", "duration": 939, - "published_at": "2020-03-06T15:07:44.000Z", "recorded": "2019-09-05", "speakers": [ "Yoann Audouin" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-test-in-ml-projects-sarah-diot-girard.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-test-in-ml-projects-sarah-diot-girard.json index 2517376d8..6d4c5f76d 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-test-in-ml-projects-sarah-diot-girard.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-test-in-ml-projects-sarah-diot-girard.json @@ -1,7 +1,6 @@ { "description": "Once your machine learning POC seems promising and your development\nenvironment is set up, the next step is to refactor your code and write\nTESTS. We know that a lot of people think tests are too complicated and\nboring to write and they are not very useful. Some manual checks can\naddress the need.\n\n| It is not totally false. Tests can be really boring and time consuming\n to write when you don't have the right tools, the right APIs, the\n right environments or the right code structure.\n| But it is always a bad idea to ignore tests or to perform them\n manually. If you want to be involved in your project life cycle, if\n you want to bring it from POC to production you need to care about\n tests. After some years tackling production bugs, you can't feel safe\n delivering without tests as you can't start driving until your seat\n belt is fastened.\n\nThere is more than one way to test. Tests can be split on several levels\n(unit, component, functional, performances, etc...) to be able to\nquickly identify the faulty code/data/parameter. Tests must also be\nautomated in a Continuous Integration and run at least on each\nexperiment before merging it in the baseline pipeline as it is done in\nsoftware engineering (the CI is triggered on each feature branch).\n\nThis talk is about how to easily write tests and testable code, how to\navoid most common traps and what are the benefits of tests on\nunrealistic data in your Machine Learning project.\n\n(Tests on real data are also really important but they are not the main\npurpose of this talk.)\n\nSlides are here:\n`sdg.jlbl.net/slides/tests\\_for\\_datascientist/presentation.html `__\n\nGood practices tell you must write tests! But testing Machine Learning\nprojects can be really complicated. Test writing seems often\ninefficient. Which kind of test should be written? How to write them?\nWhat are the benefits?\n", "duration": 1843, - "published_at": "2019-12-02T12:00:10.000Z", "recorded": "2019-09-04", "speakers": [ "Sarah Diot-Girard" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-test-with-hypothesis-zac-hatfield-dodds.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-test-with-hypothesis-zac-hatfield-dodds.json index 2265b0896..a5e64023b 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-test-with-hypothesis-zac-hatfield-dodds.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-test-with-hypothesis-zac-hatfield-dodds.json @@ -1,7 +1,6 @@ { "description": "| Hypothesis is a testing package that will search for counterexamples\n to your\n| assertions \u2013 so you can write tests that provide a high-level\n description of your\n| code or system, and let the computer attempt a Popperian\n falsification. If it\n| fails, your code is (probably) OK\u2026 and if it succeeds you have a\n minimal input\n| to debug.\n\n| Come along and learn the principles of property-based testing, how to\n use\n| Hypothesis, and how to use it to check scientific code \u2013 whether\n highly- polished\n| or quick-and-dirty!\n\n| You can even use it to test 'black boxes', such as simulations, where\n we have no\n| way of independently verifying that some input leads to the right\n output!\n| Intrigued? Come and learn about the power of embedding assertions in\n your\n| code, and metamorphic relations in your tests!\n\nTesting research code can be difficult, but is essential for robust\nresults. Using Hypothesis, a tool for property-based testing, I'll show\nhow testing can be both easier and dramatically more powerful - even for\ncomplex \"black box\" codes.\n", "duration": 1807, - "published_at": "2019-12-02T13:27:24.000Z", "recorded": "2019-09-03", "speakers": [ "Zac Hatfield-Dodds" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-the-magic-of-neural-embeddings-with-tensorflow-2-oliver-zeigermann.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-the-magic-of-neural-embeddings-with-tensorflow-2-oliver-zeigermann.json index d36375ad4..64faa973c 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-the-magic-of-neural-embeddings-with-tensorflow-2-oliver-zeigermann.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-the-magic-of-neural-embeddings-with-tensorflow-2-oliver-zeigermann.json @@ -2,7 +2,6 @@ "description": "Symbols, words, categories etc. need to be converted into numbers before\nthey can be processed by neural networks or used into other ML methods\nlike clustering or outlier detection.\n\nIt is desirable to have the converted numbers represent semantics of the\nencoded categories. That means, numbers close to each other indicate\nsimilar semantics.\n\nIn this session you will learn what you need to train a neural network\nfor such embeddings. I will bring a complete example including code that\nI will share using TensorFlow 2 functional API and the Colab service.\n\nI will also share some tricks how to stabilize embeddings when either\nthe model changes or you get more training data.\n\nNeural Embeddings are a powerful tool of turning categorical into\nnumerical values. Given reasonable training data semantics present in\nthe categories can be preserved in the numerical representation.\n", "duration": 1501, "language": "eng", - "published_at": "2020-03-06T12:30:16.000Z", "recorded": "2019-09-05", "speakers": [ "Oliver Zeigermann" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-the-qgis-user-interface-a-deep-dive-sebastian-m-ernst.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-the-qgis-user-interface-a-deep-dive-sebastian-m-ernst.json index fe4c41d1d..84c34df28 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-the-qgis-user-interface-a-deep-dive-sebastian-m-ernst.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-the-qgis-user-interface-a-deep-dive-sebastian-m-ernst.json @@ -2,7 +2,6 @@ "description": "Having been around for two decades, QGIS clearly is an organically grown\nproject. It has primarily been fulfilling the various special needs of\nits developers. From an outsider's perspective, it is an amazingly rich\npatchwork of features. However, some are deeply hidden in numerous\nlayers of user interface elements, requiring intense training for\ngetting used to. Others are only accessibly through APIs, requiring not\nonly training but also programming skills.\n\nBeing confronted with QGIS as professional users on a regular basis, we\nthought about what would make working with QGIS more attractive. What if\nQGIS has a pleasant, coherent theme, including not only colors but also\nicons? What if QGIS had the ability to store workbench configurations?\nWhat if QGIS had dedicated interface configurations for specific\nworkflows? What if much more of the API's functionality was accessible\nthrough the GUI in a well-organized way? How could QGIS work in a useful\nmanner with ribbons? How could the incredible amount of dialogs be tamed\ninto tabs?\n\nWe demonstrate (live) a series of user interface experiments \u2013 all of\nwhich are or will be `available online `__ as\nPython plugins.\n\nIn this context, the current state of play with respect to Python and\nQGIS is explained in detail. The way QGIS is typically being distributed\nputs quite a few unusual limitations on Python plugin code. The case is\nmade that some of those limitations are simply out of date and must be\novercome, which may require help from the broader (scientific) Python\ncommunity.\n\nWe seek a conversation with the audience.\n\nHow can one of the largest code bases in open source Geographical\nInformation Science \u2013 QGIS \u2013 be enhanced and re-designed? Through the\npowers of Python plugins. This talk demonstrates concepts on how to make\nQGIS more user- friendly.\n", "duration": 813, "language": "eng", - "published_at": "2020-03-06T12:30:17.000Z", "recorded": "2019-09-05", "speakers": [ "Sebastian M. Ernst" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-tofu-didier-and-laura.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-tofu-didier-and-laura.json index bf418e822..44540a888 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-tofu-didier-and-laura.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-tofu-didier-and-laura.json @@ -2,7 +2,6 @@ "description": "| Nuclear fusion comes along with great promises of almost limitless\n energy with little risks and waste. But it also comes with significant\n scientific and technological complexities. Decades of efforts may find\n an echo in ITER, an international tokamak being built to address this\n challenge. A tokamak is a particular kind of advanced experimental\n nuclear fusion reactor. It is a torus-shaped vacuum vessel in which a\n hydrogen plasma of very low density is heated up to temperatures\n (10-100 millions of degrees Celsius) allowing nuclear fusion reactions\n to occur. The torus-shaped plasma radiates light, which is measured in\n various wavelength domains by dedicated sets of detectors (called\n diagnostics), like 2D cameras observing visible light, 1D arrangements\n of diodes sensitive to X-rays, ultra-violet spectrometers... Due to\n the torus shape, the plasma is axisymmetric, and like in medical\n imaging, tomography methods can be used to diagnose the light radiated\n in a plasma cross-section.\n| For all diagnostics, one can seek to solve the direct or the inverse\n problem. The direct problem consists in computing the measurements\n from a known plasma light emissivity, provided by a plasma simulation\n for example.\n| The inverse problem consists in computing the plasma light emissivity\n from experimental measurements. The algorithms involved in solving\n both the direct and inverse problem are very similar, no matter the\n wavelength domain.\n\nLike many, the fusion community tends to suffer from a lack of\nreproducibility of the results it publishes. This problem is\nparticularly acute in the case of tomography diagnostics since the\ninverse problem is ill-posed and the solution unicity is not guaranteed.\nThere are also many possible simplifying hypotheses that may, or may\nnot, be relevant for each diagnostic. In this regard, the historical\nuses of the community display a large variety of single-user black- box\ncodes, each typically designed by a student, and often forgotten or left\nas is until a new student is hired and starts all over again.\n\n| In this context, a machine-independent, open-source and documented\n python library, ToFu, was started to provide the fusion community with\n a common and free reference tool.\n| We thus aim at improving reproducibility by providing a known and\n transparent tool, able to efficiently solve both the direct and\n inverse problem for tomography diagnostics. It can use very simple\n hypothesis or very complete diagnostics descriptions alike, one of the\n ideas being that it should allow users to perform accurate\n calculations easily, sparing them the need to simplify hypotheses that\n are not always valid.\n\n| A zero version of tofu, fully operational but not user-friendly\n enough, was first developed between 2014 and 2016 when it was used for\n published results. Strong with this first proof of principle, a\n significant effort was initiated in 2017 to completely re-write the\n code with a stronger emphasis on python community standards (PEP8),\n version control (Github), performance (cython), packaging (pip and\n conda), continuous integration (nosetests and travis), modularity\n (architecture refurbishing), user-friendliness (renamings, utility\n tools) and object-oriented coding (class inheritance).\n| This effort is still ongoing to this day and is scheduled to go on for\n the next 2.5 years. However, the first milestones have been reached,\n and we would like to present the first re-written modules to the\n python community, for publicity, advice, feedback, mutually enriching\n exchanges and more generally because we feel tofu is part of the large\n open-source python scientific community.\n\nThe code is composed of several modules: a geometry module, a data\nvisualization module, a meshing module, and an inversion module. We will\npresent the geometry module (containing ray-tracing tools, spatial\nintegration algorithms...) and the data module (making use of matplotlib\nfor pre-defined interactive figures). Using profiling tools, the\nnumerical core of the geometry module was optimized and parallelized\nrecently in ``Cython`` making the code more than ten thousand times\nfaster than the previous version on some test cases. Memory usage has\nalso been reduced by half on the largest test cases.\n\nsee `ToFu `__\n\nWe present an open-source parallelized and cythonized python library,\nToFu, for modeling tomography diagnostics on nuclear fusion reactors.\nIts functionalities (with realistic examples), its architecture and its\ndesign will be shown.\n", "duration": 1699, "language": "eng", - "published_at": "2019-12-01T10:00:26.000Z", "recorded": "2019-09-04", "speakers": [ "Didier VEZINET", diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-understanding-numba-valentin-haenel.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-understanding-numba-valentin-haenel.json index 6393d40b5..28a21251b 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-understanding-numba-valentin-haenel.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-understanding-numba-valentin-haenel.json @@ -2,7 +2,6 @@ "description": "| In this talk I will take you on a whirlwind tour of Numba, the\n just-in-time,\n| type-specializing, function compiler for accelerating\n numerically-focused\n| Python. Numba can compile the computationally intensive functions of\n your\n| numerical programs and libraries from Python/NumPy to highly optimized\n binary\n| code. It does this by inferring the data types used inside these\n functions and\n| uses that information to generate code that is specific to those data\n types\n| and specialised for your target hardware. On top of that, it does all\n of this\n| on-the-fly---or just-in-time---as your program runs. This\n significantly reduces\n| the potential complexity that traditionally comes with pre-compiling\n and\n| shipping numerical code for a variety of operating systems, Python\n versions and\n| hardware architectures. All you need in principle, is to\n ``conda install numba``\n| and decorate your compute intensive functions with ``@nuba.jit``!\n\n| This talk will equip you with a mental model of how Numba is\n implemented and\n| how it works at the algorithmic level. You will gain a deeper\n understanding of\n| the types of use-cases where Numba excels and why. Also, you will\n understand\n| the limitations and caveats that exist within Numba, including any\n potential\n| ideas and strategies that might alleviate these. At the end of the\n talk you\n| will be in a good position to decide if Numba is for you and you will\n have\n| learnt about the concrete steps you need to take to include it as a\n dependency\n| in your program or library.\n\nIn this talk I will take you on a whirlwind tour of Numba and you will\nbe quipped with a mental model of how Numba works and what it is good\nat. At the end, you will be able to decide if Numba could be useful for\nyou.\n", "duration": 1775, "language": "eng", - "published_at": "2020-03-06T16:46:59.000Z", "recorded": "2019-09-05", "speakers": [ "Valentin Haenel" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-veloxchem-python-meets-quantum-chemistry-and-hpc-olav-vahtras.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-veloxchem-python-meets-quantum-chemistry-and-hpc-olav-vahtras.json index 7abf55f83..ceb0c9882 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-veloxchem-python-meets-quantum-chemistry-and-hpc-olav-vahtras.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-veloxchem-python-meets-quantum-chemistry-and-hpc-olav-vahtras.json @@ -2,7 +2,6 @@ "description": "| Zilvinas Rinkevicius, Xin Li, Olav Vahtras, Manuel Brand, Karan\n Ahmadzadeh, Magnus\n| Ringholm, Nanna List, and Patrick Norman\n\n| With the ease of Python library modules, VeloxChem offers a front end\n to quantum chemical\n| calculations on contemporary high-performance computing (HPC) systems\n and aims at\n| harnessing the future compute power within the EuroHPC initiative. At\n the heart of this\n| software lies a module for the evaluation of electron-repulsion\n integrals (ERIs) using the ObaraSaika recurrence scheme, where a high\n degree of efficiency is achieved by employing\n| architecture-independent vectorization via OpenMP SIMD pragmas in the\n auto- generated C++\n| source code. The software is topology aware and with a\n Python-controlled work and task flow,\n| the idle time is minimized using an MPI/OpenMP partitioning of\n resources.\n| In the second software layer, we have implemented a highly accurate\n SCF start guess based\n| on atomic densities and a first-level of iterations in a reduced\n version of the user-defined basis\n| set, leading to a very smooth convergence in the subsequent standard\n DIIS scheme. This layer\n| also includes vectorized and OpenMP/MPI parallelized modules for\n efficient generation of DFT\n| grid points and weights as well as kernel integration.\n| In the third software layer, we present real and complex response\n functions as to address\n| dispersive and absorptive molecular properties in spectroscopy. The\n kernel module in this layer\n| is the iterative linear response equation solver that we have\n formulated and implemented for a\n| combination of multiple optical frequencies and multiple perturbation\n operators. With efficient\n| use of computer memory, we enable the simultaneous reference to, and\n solving of, in the order\n| of 1,000 response equations for sizable biochemical systems without\n spatial symmetry, and we\n| can thereby determine electronic response spectra in arbitrary\n wavelength regions, including\n| UV/vis and X-Ray, without resolving the sometimes embedded excited\n states in the spectrum.\n| E.g. the electronic CD spectrum (involving the Cartesian sets of\n electric and magnetic\n| perturbations) over a range of some 10 eV is obtained at a\n computational cost comparable to\n| that of determining the transition energy of the lowest excited state,\n or optimizing the electronic\n| structure of the reference state.\n\n| A new and efficient Python/C++ modular library for real and complex\n response functions at the\n| level of Kohn-Sham density functional theory\n", "duration": 1276, "language": "eng", - "published_at": "2020-03-06T15:07:43.000Z", "recorded": "2019-09-05", "speakers": [ "Olav Vahtras" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-visual-diagnostics-at-scale-rebecca-bilbro.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-visual-diagnostics-at-scale-rebecca-bilbro.json index 7183e9465..537c16dca 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-visual-diagnostics-at-scale-rebecca-bilbro.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-visual-diagnostics-at-scale-rebecca-bilbro.json @@ -2,7 +2,6 @@ "description": "Even with a modestly-sized dataset, the hunt for the most effective\nmachine learning model is *hard*. Arriving at the optimal combination of\nfeatures, algorithm, and hyperparameters frequently requires significant\nexperimentation and iteration. This leads some of us to stay inside\nalgorithmic comfort zones, some to trail off on random walks, and others\nto resort to automated processes like gridsearch. But whatever path we\ntake, we are often left in doubt about whether our final solution really\nis the optimal one. And as our datasets grow in size and dimension, so\ntoo does this ambiguity.\n\nFortunately, many of us have developed strategies for steering model\nsearch. Open source libraries like\n`seaborn `__,\n`pandas `__ and\n`yellowbrick `__ can help make\nmachine learning more informed with visual diagnostic tools like\nhistograms, correlation matrices, parallel coordinates, manifold\nembeddings, validation and learning curves, residuals plots, and\nclassification heatmaps. These tools enable us to tune our models with\nvisceral cues that allow us to be more strategic in our choices.\nVisualizing feature transformations, algorithmic behavior,\ncross-validation methods, and model performance allows us a peek into\nthe multi-dimensional realm in which our models operate.\n\nHowever, large, high-dimensional datasets can prove particularly\ndifficult to explore. Not only do the majority of people struggle to\nvisualize anything beyond two- or three-dimensional space, many of our\nfavorite open source Python tools are not designed to be performant with\narbitrarily big data. So how well *do* our favorite visualization\ntechniques hold up to large, complex datasets?\n\nIn this talk, we'll consider a suite of visual diagnostics \u2014 some\nfamiliar and some new \u2014 and explore their strengths and weaknesses with\nseveral publicly available datasets of varying size. Which suffer most\nfrom the curse of dimensionality in face of increasingly big data? What\nare the workarounds (e.g. sampling, brushing, filtering, etc.) and when\nshould we use them? And most importantly, how can we continue to steer\nthe machine learning process \u2014 not only purposefully but at scale?\n\nMachine learning is a search for the best combination of features,\nmodel, and hyperparameters. But as data grow, so does the search space!\nFortunately, visual diagnostics can focus our search and allow us to\nsteer modeling purposefully, and at scale.\n", "duration": 1773, "language": "eng", - "published_at": "2020-03-06T15:54:33.000Z", "recorded": "2019-09-05", "speakers": [ "Dr. Rebecca Bilbro" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-vtext-fast-text-processing-in-python-using-rust-roman-yurchak.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-vtext-fast-text-processing-in-python-using-rust-roman-yurchak.json index 7d9ed0b46..571ab63cc 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-vtext-fast-text-processing-in-python-using-rust-roman-yurchak.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-vtext-fast-text-processing-in-python-using-rust-roman-yurchak.json @@ -2,7 +2,6 @@ "description": "| Scientific Python has historically relied on compiled extensions for\n performance critical parts of the code. In this talk, we outline how\n to write Rust extensions for Python using\n `rust-numpy `__,\n| project. Advantages and limitations of this approach as compared to\n Cython or wrapping Fortran, C or C++ are also discussed.\n\nIn the second part, we introduce the\n`vtext `__ project that allows fast text\nprocessing in Python using Rust. In particular, we consider the problems\nof text tokenization, and (parallel) token counting resulting in a\nsparse vector representation of documents. These can then be used as\ninput in machine learning or information retrieval applications. We\noutline the approach used in vtext and compare to existing solutions of\nthese problems in the Python ecosystem.\n\nIn this talk, we present some of the benefits of writing extensions for\nPython in Rust. We then illustrate this approach on the\n`vtext `__ project, that aims to be a\nhigh- performance library for text processing.\n", "duration": 965, "language": "eng", - "published_at": "2020-03-06T15:07:44.000Z", "recorded": "2019-09-05", "speakers": [ "Roman Yurchak" diff --git a/euroscipy-2019/videos/euroscipy-2019-bilbao-welcome-talk-alexandre-savio.json b/euroscipy-2019/videos/euroscipy-2019-bilbao-welcome-talk-alexandre-savio.json index bb20c90f8..85da2f9b0 100644 --- a/euroscipy-2019/videos/euroscipy-2019-bilbao-welcome-talk-alexandre-savio.json +++ b/euroscipy-2019/videos/euroscipy-2019-bilbao-welcome-talk-alexandre-savio.json @@ -2,7 +2,6 @@ "description": "EuroSciPy 2019 Bilbao \nSeptember 4, Wednesday \nMain Track\n\nWelcome talk \nAlexandre Savio\n\nIntroduction talk to EuroSciPy 2019 Bilbao.", "duration": 612, "language": "eng", - "published_at": "2019-10-10T07:56:01.000Z", "recorded": "2019-09-04", "speakers": [ "Alexandre Savio" diff --git a/pybay-2023/videos/15311.json b/pybay-2023/videos/15311.json index 5d7375d70..ada69854b 100644 --- a/pybay-2023/videos/15311.json +++ b/pybay-2023/videos/15311.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T12:15:00", "slug": "Programming_Your_Computer_With_Python", - "source_url": "https://youtu.be/LceLUPdIzRs", "speakers": [ "Glyph" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-60c419c7-8de4-4c50-a99a-47403ed7cd54" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15312.json b/pybay-2023/videos/15312.json index 8bdf45bd6..286854e5b 100644 --- a/pybay-2023/videos/15312.json +++ b/pybay-2023/videos/15312.json @@ -25,6 +25,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-06c2b133-a57d-492e-b98b-385f2e54f2a9" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15313.json b/pybay-2023/videos/15313.json index 391886f1c..0a80ad3ea 100644 --- a/pybay-2023/videos/15313.json +++ b/pybay-2023/videos/15313.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T13:15:00", "slug": "Data_Science_beasts_failures_and_where_to_find_them", - "source_url": "https://youtu.be/pHlptwP20MY", "speakers": [ "Grishma Jena" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-56e08f39-fbea-4cf8-9af0-aba66372abc6" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15314.json b/pybay-2023/videos/15314.json index 67ab60110..4f519b0fc 100644 --- a/pybay-2023/videos/15314.json +++ b/pybay-2023/videos/15314.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T13:15:00", "slug": "Ranking_and_Retrieval_Techniques_for_Retrieval_Augmented_Generation_with_Haystack", - "source_url": "https://youtu.be/6u7osMnIQHg", "speakers": [ "Tuana Celik" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-bf72d9fa-7408-4c51-8438-562227a0d619" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15315.json b/pybay-2023/videos/15315.json index 1698c97ce..bd70500bc 100644 --- a/pybay-2023/videos/15315.json +++ b/pybay-2023/videos/15315.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T13:30:00", "slug": "Type_safe_data_validation_using_Pydantic_v2", - "source_url": "https://youtu.be/h9uCUVjKeas", "speakers": [ "Adrian Garcia Badaracco" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-04379c6c-6e4d-4004-93a2-4028147c9ba1" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15316.json b/pybay-2023/videos/15316.json index 316a4d0af..1dd676da5 100644 --- a/pybay-2023/videos/15316.json +++ b/pybay-2023/videos/15316.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T13:45:00", "slug": "Craft_Complex_Mock_Data", - "source_url": "https://youtu.be/N5Anbq8vYNk", "speakers": [ "Jason Koo" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-66726e07-256d-4ae1-b1f8-2b87c97c3546" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15317.json b/pybay-2023/videos/15317.json index 5d9550c6f..209396721 100644 --- a/pybay-2023/videos/15317.json +++ b/pybay-2023/videos/15317.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T14:00:00", "slug": "Embeddings_What_they_are_and_why_they_matter", - "source_url": "https://youtu.be/snKTqb10vWQ", "speakers": [ "Simon Willison" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-b154e0eb-4b03-4acd-9e90-4ba7ce0929c9" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15318.json b/pybay-2023/videos/15318.json index f2e24121f..f295d53d5 100644 --- a/pybay-2023/videos/15318.json +++ b/pybay-2023/videos/15318.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T14:30:00", "slug": "Lets_talk_about_JWT", - "source_url": "https://youtu.be/0vxVUjUL_Nw", "speakers": [ "Jessica Temporal" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-46a2337d-e054-48c7-9355-c143140e64c0" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15320.json b/pybay-2023/videos/15320.json index c0fb5a000..acd3522eb 100644 --- a/pybay-2023/videos/15320.json +++ b/pybay-2023/videos/15320.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T15:15:00", "slug": "FORKS_POOLS_ASYNC_Solving_Wordle_with_Pythons_concurrency_tools", - "source_url": "https://youtu.be/ViUEGvNDwrQ", "speakers": [ "Christopher Neugebauer" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-df477a5e-31da-4727-a04b-2d7a9c698715" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15321.json b/pybay-2023/videos/15321.json index a0bdfe39b..49bab0174 100644 --- a/pybay-2023/videos/15321.json +++ b/pybay-2023/videos/15321.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T15:15:00", "slug": "Infrastructure_as_a_Product_Lessons_in_Platform_Engineering", - "source_url": "https://youtu.be/5hbxUX4dwyk", "speakers": [ "Nick DiRienzo" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-251cd32c-29ce-4562-acb3-ba0b40bef001" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15322.json b/pybay-2023/videos/15322.json index 1017afc03..68c08160d 100644 --- a/pybay-2023/videos/15322.json +++ b/pybay-2023/videos/15322.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T15:45:00", "slug": "Elevating_Python_Development_with_Nix_Package_Manager", - "source_url": "https://youtu.be/AJs_izrEBOA", "speakers": [ "Salar Rahmanian" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-906582fb-d40b-4c3a-9203-76e84148face" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15323.json b/pybay-2023/videos/15323.json index a8029fcd7..442e2c61c 100644 --- a/pybay-2023/videos/15323.json +++ b/pybay-2023/videos/15323.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T15:45:00", "slug": "Using_pandas_and_pyspark_to_address_challenges_in_processing_and_storing_time_series_instrument_data", - "source_url": "https://youtu.be/yCp6b_rHrLQ", "speakers": [ "Aaron Wiegel" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-61dd28d8-78b9-4c39-9b10-11f62d83ab10" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15325.json b/pybay-2023/videos/15325.json index 084e78304..d77f1835f 100644 --- a/pybay-2023/videos/15325.json +++ b/pybay-2023/videos/15325.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T16:15:00", "slug": "Python_in_Hardware_Embedded_Systems_A_Deep_Dive", - "source_url": "https://youtu.be/r6wabaVH7CE", "speakers": [ "Sriram Vamsi Ilapakurthy" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-2ec02e2b-6827-4823-aa00-8e77605ff96c" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15327.json b/pybay-2023/videos/15327.json index 3061869d0..6cf1254f4 100644 --- a/pybay-2023/videos/15327.json +++ b/pybay-2023/videos/15327.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T16:45:00", "slug": "Better_Together_Unleashing_the_Synergy_of_Pandas_Polars_and_Apache_Arrow", - "source_url": "https://youtu.be/IVgizc711OA", "speakers": [ "Chris Brousseau" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-438b7844-5b59-44f8-89f8-d8b3b8d270a0" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15328.json b/pybay-2023/videos/15328.json index 343af773d..1ce794119 100644 --- a/pybay-2023/videos/15328.json +++ b/pybay-2023/videos/15328.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T17:00:00", "slug": "Python_Planets_and_Portals_Designing_Web_Apps_for_Modern_Astronomers", - "source_url": "https://youtu.be/oZMe1EgNvZg", "speakers": [ "Dan Burger" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-11880483-ccba-41a9-8c4d-648f258f3be5" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15329.json b/pybay-2023/videos/15329.json index af11f5735..49aa42c68 100644 --- a/pybay-2023/videos/15329.json +++ b/pybay-2023/videos/15329.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T17:00:00", "slug": "Scale_Data_Science_by_Pandas_API_on_Spark", - "source_url": "https://youtu.be/uQRmZbIyuzg", "speakers": [ "Xinrong Meng" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-584fc381-f0db-4da5-ae69-d2092d26bb3a" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15330.json b/pybay-2023/videos/15330.json index d27231b9a..5f5cf6867 100644 --- a/pybay-2023/videos/15330.json +++ b/pybay-2023/videos/15330.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T18:00:00", "slug": "Testing_Strategies_for_Python", - "source_url": "https://youtu.be/HHR2YnWD0rw", "speakers": [ "Liz Acosta" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-a4b35ed6-dca5-4457-be11-9c924afcae8e" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15331.json b/pybay-2023/videos/15331.json index 9331dd197..7b2262779 100644 --- a/pybay-2023/videos/15331.json +++ b/pybay-2023/videos/15331.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T16:45:00", "slug": "No_More_Nitpicks_effortless_cleanup_with_fixers_formatters_and_codemods", - "source_url": "https://youtu.be/vqlHqqhTwzA", "speakers": [ "Zac Hatfield-Dodds" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-41681d23-2485-4034-a93a-690d0e22ffc6" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15332.json b/pybay-2023/videos/15332.json index c42a42426..01a1e25ee 100644 --- a/pybay-2023/videos/15332.json +++ b/pybay-2023/videos/15332.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T18:54:27", "slug": "Unleashing_Pythons_Power_Serverless_Innovations_with_AWS_Lambda", - "source_url": "https://youtu.be/yaXfmdasT9E", "speakers": [ "Mayank Jindal" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-82ec4de4-8439-412b-b2fd-619f33b77451" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15333.json b/pybay-2023/videos/15333.json index 24a7bfe8f..4de387376 100644 --- a/pybay-2023/videos/15333.json +++ b/pybay-2023/videos/15333.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T14:15:00", "slug": "Python_deployment_with_Docker_and_Poetry", - "source_url": "https://youtu.be/hXYFS2pOEH8", "speakers": [ "Cristian Heredia" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-a8a33ab3-caac-43aa-a10a-6d23428c73c9" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15334.json b/pybay-2023/videos/15334.json index 1779e070f..1ce4f4631 100644 --- a/pybay-2023/videos/15334.json +++ b/pybay-2023/videos/15334.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T18:30:00", "slug": "Shiny_Datacentric_web_applications_in_Python", - "source_url": "https://youtu.be/o8vxlj8Vcqo", "speakers": [ "Joe Cheng" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-7e5324de-3afa-4614-a498-562bd5eb9986" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15335.json b/pybay-2023/videos/15335.json index ae93b4562..6f5489368 100644 --- a/pybay-2023/videos/15335.json +++ b/pybay-2023/videos/15335.json @@ -25,6 +25,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-cefe110f-aeca-4c60-ad5d-65f4f0ecce8a" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15336.json b/pybay-2023/videos/15336.json index 0d8747a73..bef8955c9 100644 --- a/pybay-2023/videos/15336.json +++ b/pybay-2023/videos/15336.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T19:00:00", "slug": "Pythons_Types_5_Amazing_Ways_Python_Type_Hints_Will_Supercharge_Your_Code", - "source_url": "https://youtu.be/aKcolk8lGGk", "speakers": [ "Michael Kennedy" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-a62cb4db-2226-4323-85c2-87ffdf33130a" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15337.json b/pybay-2023/videos/15337.json index 7b5451fc4..f31e579ca 100644 --- a/pybay-2023/videos/15337.json +++ b/pybay-2023/videos/15337.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T19:00:00", "slug": "Design_Patterns_for_Data_Pipelines", - "source_url": "https://youtu.be/opR44Tu9p1I", "speakers": [ "Lisa Dusseault" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-4fbeb926-c56e-492f-b552-823b6a08cec2" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15338.json b/pybay-2023/videos/15338.json index 0a7f11bf1..2b00c7f94 100644 --- a/pybay-2023/videos/15338.json +++ b/pybay-2023/videos/15338.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T18:00:00", "slug": "Beyond_Conventional_Embracing_Python_LLMs_for_Quality_Assurance", - "source_url": "https://youtu.be/TSI4rYKpwws", "speakers": [ "Paul Pereyda Karayan" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-637576b0-27e3-4dae-8fc4-b1ad8067664d" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pybay-2023/videos/15339.json b/pybay-2023/videos/15339.json index 9528db7d3..b97707476 100644 --- a/pybay-2023/videos/15339.json +++ b/pybay-2023/videos/15339.json @@ -6,7 +6,6 @@ "quality_notes": null, "recorded": "2023-10-08T16:15:00", "slug": "Contain_Yourself", - "source_url": "https://youtu.be/tI2y7pG6v84", "speakers": [ "Moshe Zadka" ], @@ -25,6 +24,5 @@ "label": "conf", "url": "https://pybay.com/speakers/#sz-speaker-5f034fd1-d671-4c83-b5b9-ba2be78b0835" } - ], - "veyepar_state": 10 + ] } \ No newline at end of file diff --git a/pycon-estonia-2019/videos/contemporary-documentation-juha-matti-santala-pycon-estonia-2019.json b/pycon-estonia-2019/videos/contemporary-documentation-juha-matti-santala-pycon-estonia-2019.json index d70b68c2e..8cff5f842 100644 --- a/pycon-estonia-2019/videos/contemporary-documentation-juha-matti-santala-pycon-estonia-2019.json +++ b/pycon-estonia-2019/videos/contemporary-documentation-juha-matti-santala-pycon-estonia-2019.json @@ -1,7 +1,6 @@ { "description": "Juha-Matti aka Juhis is a developer community builder, web developer and programming teacher from Helsinki, Finland. He's passionate about building diverse and welcoming communities, board games and having discussions about technology. Currently he is working at Futurice in Helsinki helping developers become better versions of themselves. In his free time, he loves to tinker with Python building small scripts to automate the manual bits of his life and building Pokemon related web applications.\n\n----\n\nGet tickets for PyCon Estonia 2020 - https://bit.ly/pycon2020\n\nPython is nearing 3 decades in existence (yes, kinda hard to believe). Where do we go from here? We have the same question, hence PyCon 2019 will be a peek into the future of Python. Come, hang out with an inspired crowd, have rich, direct conversations and who knows, maybe you\u2019ll even find an answer.\n\nhttps://pycon.ee\nhttps://twitter.com/PyConEstonia\nhttps://www.facebook.com/events/vaba-lava/pycon-estonia-2019/318811955508926/", "duration": 2015, - "published_at": "2019-10-20T04:52:31.000Z", "recorded": "2019-10-03", "speakers": [ "Juha-Matti Santala" diff --git a/pycon-estonia-2019/videos/data-processing-pipelines-with-apache-airflow-python-karl-marka-pycon-estonia-2019.json b/pycon-estonia-2019/videos/data-processing-pipelines-with-apache-airflow-python-karl-marka-pycon-estonia-2019.json index 7edc23134..0441ee0aa 100644 --- a/pycon-estonia-2019/videos/data-processing-pipelines-with-apache-airflow-python-karl-marka-pycon-estonia-2019.json +++ b/pycon-estonia-2019/videos/data-processing-pipelines-with-apache-airflow-python-karl-marka-pycon-estonia-2019.json @@ -2,7 +2,6 @@ "description": "Karl is a data scientist with a biology & bioinformatics background. Having started his professional career in the U.S. biotech scene in Boston he refocused on the fintech sector in hopes of bigger challenges. He has built up the data science team and the machine learning pipeline for an international consumer finance company Creditstar Group and now leads a data science team at the international credit bureau and data management services provider Creditinfo. Outside of his regular work he is also a co-founder at a real estate startup Securebadger where he deals with everything from development to risk models to marketing.\n\n----\n\nGet tickets for PyCon Estonia 2020 - https://bit.ly/pycon2020\n\nPython is nearing 3 decades in existence (yes, kinda hard to believe). Where do we go from here? We have the same question, hence PyCon 2019 will be a peek into the future of Python. Come, hang out with an inspired crowd, have rich, direct conversations and who knows, maybe you\u2019ll even find an answer.\n\nhttps://pycon.ee\nhttps://twitter.com/PyConEstonia\nhttps://www.facebook.com/events/vaba-lava/pycon-estonia-2019/318811955508926/", "duration": 2429, "language": "eng", - "published_at": "2019-10-19T17:25:14.000Z", "recorded": "2019-10-03", "speakers": [ "Karl M\u00e4rka" diff --git a/pycon-estonia-2019/videos/estonian-natural-language-toolkit-paul-tammo-pycon-estonia-2019.json b/pycon-estonia-2019/videos/estonian-natural-language-toolkit-paul-tammo-pycon-estonia-2019.json index bcf09f0ab..dc0d4ba2c 100644 --- a/pycon-estonia-2019/videos/estonian-natural-language-toolkit-paul-tammo-pycon-estonia-2019.json +++ b/pycon-estonia-2019/videos/estonian-natural-language-toolkit-paul-tammo-pycon-estonia-2019.json @@ -2,7 +2,6 @@ "description": "Paul's background lay in mathematics and computer science. He has used Python over three years for developing open source text processing toolkit EstNLTK and its proprietary extensions that are used to extract facts and measurements from the health data records at the University of Tartu and STACC. He now works at Fujitsu and uses Python to improve Estonian e-government services.\n\n----\n\nGet tickets for PyCon Estonia 2020 - https://bit.ly/pycon2020\n\nPython is nearing 3 decades in existence (yes, kinda hard to believe). Where do we go from here? We have the same question, hence PyCon 2019 will be a peek into the future of Python. Come, hang out with an inspired crowd, have rich, direct conversations and who knows, maybe you\u2019ll even find an answer.\n\nhttps://pycon.ee\nhttps://twitter.com/PyConEstonia\nhttps://www.facebook.com/events/vaba-lava/pycon-estonia-2019/318811955508926/", "duration": 2210, "language": "eng", - "published_at": "2019-10-19T14:10:35.000Z", "recorded": "2019-10-03", "speakers": [ "Paul Tammo" diff --git a/pycon-estonia-2019/videos/final-words-pycon-estonia-2019-the-future-of-python.json b/pycon-estonia-2019/videos/final-words-pycon-estonia-2019-the-future-of-python.json index d2a2f1a28..45c5c384e 100644 --- a/pycon-estonia-2019/videos/final-words-pycon-estonia-2019-the-future-of-python.json +++ b/pycon-estonia-2019/videos/final-words-pycon-estonia-2019-the-future-of-python.json @@ -2,7 +2,6 @@ "description": "Get tickets for PyCon Estonia 2020 - https://bit.ly/pycon2020\n\nPython is nearing 3 decades in existence (yes, kinda hard to believe). Where do we go from here? We have the same question, hence PyCon 2019 will be a peek into the future of Python. Come, hang out with an inspired crowd, have rich, direct conversations and who knows, maybe you\u2019ll even find an answer.\n\nhttps://pycon.ee\nhttps://twitter.com/PyConEstonia\nhttps://www.facebook.com/events/vaba-lava/pycon-estonia-2019/318811955508926/", "duration": 531, "language": "eng", - "published_at": "2019-10-20T05:16:49.000Z", "recorded": "2019-10-03", "speakers": [ "Amna Ahsan", diff --git a/pycon-estonia-2019/videos/hacking-and-social-engineering-with-a-70-success-rate-lukas-hurych-pycon-estonia-2019.json b/pycon-estonia-2019/videos/hacking-and-social-engineering-with-a-70-success-rate-lukas-hurych-pycon-estonia-2019.json index ab47835d6..a62df321a 100644 --- a/pycon-estonia-2019/videos/hacking-and-social-engineering-with-a-70-success-rate-lukas-hurych-pycon-estonia-2019.json +++ b/pycon-estonia-2019/videos/hacking-and-social-engineering-with-a-70-success-rate-lukas-hurych-pycon-estonia-2019.json @@ -2,7 +2,6 @@ "description": "Luk\u00e1\u0161 is a product/marketing guy with a development background (Python is his biggest love but he went through Ruby, PHP, Swift, Javascript, Java). Studying psychology gave him a great knowledge of marketing, but the combination with Python is a killer for phishing, writing malware, hacking and all the fun stuff. He's doing all of this just as a hobby trying to test and protect his company and his team.\n\n----\n\nGet tickets for PyCon Estonia 2020 - https://bit.ly/pycon2020\n\nPython is nearing 3 decades in existence (yes, kinda hard to believe). Where do we go from here? We have the same question, hence PyCon 2019 will be a peek into the future of Python. Come, hang out with an inspired crowd, have rich, direct conversations and who knows, maybe you\u2019ll even find an answer.\n\nhttps://pycon.ee\nhttps://twitter.com/PyConEstonia\nhttps://www.facebook.com/events/vaba-lava/pycon-estonia-2019/318811955508926/", "duration": 2357, "language": "eng", - "published_at": "2019-10-19T14:43:49.000Z", "recorded": "2019-10-03", "speakers": [ "Luk\u00e1\u0161 Hurych" diff --git a/pycon-estonia-2019/videos/how-to-make-your-own-cicd-tool-with-python-3-igor-davydenko-pycon-estonia-2019.json b/pycon-estonia-2019/videos/how-to-make-your-own-cicd-tool-with-python-3-igor-davydenko-pycon-estonia-2019.json index 083f392b1..138ffa18c 100644 --- a/pycon-estonia-2019/videos/how-to-make-your-own-cicd-tool-with-python-3-igor-davydenko-pycon-estonia-2019.json +++ b/pycon-estonia-2019/videos/how-to-make-your-own-cicd-tool-with-python-3-igor-davydenko-pycon-estonia-2019.json @@ -2,7 +2,6 @@ "description": "Igor is a web developer from Kyiv, Ukraine, who advocates for using type hints in Python and loves to create web applications. In asyncio I trust.\n\n----\n\nGet tickets for PyCon Estonia 2020 - https://bit.ly/pycon2020\n\nPython is nearing 3 decades in existence (yes, kinda hard to believe). Where do we go from here? We have the same question, hence PyCon 2019 will be a peek into the future of Python. Come, hang out with an inspired crowd, have rich, direct conversations and who knows, maybe you\u2019ll even find an answer.\n\nhttps://pycon.ee\nhttps://twitter.com/PyConEstonia\nhttps://www.facebook.com/events/vaba-lava/pycon-estonia-2019/318811955508926/", "duration": 2273, "language": "eng", - "published_at": "2019-10-19T14:52:06.000Z", "recorded": "2019-10-03", "speakers": [ "Igor Davydenko" diff --git a/pycon-estonia-2019/videos/intro-pycon-estonia-2019-the-future-of-python.json b/pycon-estonia-2019/videos/intro-pycon-estonia-2019-the-future-of-python.json index d3daee4de..19ba164aa 100644 --- a/pycon-estonia-2019/videos/intro-pycon-estonia-2019-the-future-of-python.json +++ b/pycon-estonia-2019/videos/intro-pycon-estonia-2019-the-future-of-python.json @@ -2,7 +2,6 @@ "description": "Python is nearing 3 decades in existence (yes, kinda hard to believe ). Where do we go from here? We have the same question, hence PyCon 2019 will be a peek into the future of Python. Come, hang out with an inspired crowd, have rich, direct conversations and who knows, maybe you\u2019ll even find an answer\n\n-----\n\nGet tickets for PyCon Estonia 2020 - https://bit.ly/pycon2020\n\nPython is nearing 3 decades in existence (yes, kinda hard to believe). Where do we go from here? We have the same question, hence PyCon 2019 will be a peek into the future of Python. Come, hang out with an inspired crowd, have rich, direct conversations and who knows, maybe you\u2019ll even find an answer.\n\nhttps://pycon.ee\nhttps://twitter.com/PyConEstonia\nhttps://www.facebook.com/events/vaba-lava/pycon-estonia-2019/318811955508926/", "duration": 323, "language": "eng", - "published_at": "2019-10-19T14:04:11.000Z", "recorded": "2019-10-03", "speakers": [ "Amna Ahsan" diff --git a/pycon-estonia-2019/videos/lightning-talks-pycon-estonia-2019.json b/pycon-estonia-2019/videos/lightning-talks-pycon-estonia-2019.json index 3416bcb7f..56d4123e0 100644 --- a/pycon-estonia-2019/videos/lightning-talks-pycon-estonia-2019.json +++ b/pycon-estonia-2019/videos/lightning-talks-pycon-estonia-2019.json @@ -2,7 +2,6 @@ "description": "1. Battery powered Python - Viktor Stiskala\n2. Python visualization - Chris Laffra\n3. Biochemistry of Python - Ken Veski\n4. mPyPI: Monadic Func Pipelines in Python - Dmitry Soshnikov\n5. Generating UnitTests - Chris Laffra\n\n----\n\nGet tickets for PyCon Estonia 2020 - https://bit.ly/pycon2020\n\nPython is nearing 3 decades in existence (yes, kinda hard to believe). Where do we go from here? We have the same question, hence PyCon 2019 will be a peek into the future of Python. Come, hang out with an inspired crowd, have rich, direct conversations and who knows, maybe you\u2019ll even find an answer.\n\nhttps://pycon.ee\nhttps://twitter.com/PyConEstonia\nhttps://www.facebook.com/events/vaba-lava/pycon-estonia-2019/318811955508926/", "duration": 1374, "language": "eng", - "published_at": "2019-10-19T18:04:26.000Z", "recorded": "2019-10-03", "speakers": [ "Various speakers" diff --git a/pycon-estonia-2019/videos/oxidizing-mypy-why-i-decided-to-learn-rust-and-use-it-with-python-dom-weldon-pycon-estonia-2019.json b/pycon-estonia-2019/videos/oxidizing-mypy-why-i-decided-to-learn-rust-and-use-it-with-python-dom-weldon-pycon-estonia-2019.json index b283e2939..85d8194d5 100644 --- a/pycon-estonia-2019/videos/oxidizing-mypy-why-i-decided-to-learn-rust-and-use-it-with-python-dom-weldon-pycon-estonia-2019.json +++ b/pycon-estonia-2019/videos/oxidizing-mypy-why-i-decided-to-learn-rust-and-use-it-with-python-dom-weldon-pycon-estonia-2019.json @@ -2,7 +2,6 @@ "description": "Dom is a Principal Software Engineer at decisionLab, a mathematical modelling consultancy based in central London. After studying natural sciences at the University of Cambridge, and pursuing a PhD in Computational Geography at King's College London, he now heads a team specialising in building production tools out of mathematical models. Dom loves all things code, and is most recently exploring the world of Rust.\n\n----\n\nGet tickets for PyCon Estonia 2020 - https://bit.ly/pycon2020\n\nPython is nearing 3 decades in existence (yes, kinda hard to believe). Where do we go from here? We have the same question, hence PyCon 2019 will be a peek into the future of Python. Come, hang out with an inspired crowd, have rich, direct conversations and who knows, maybe you\u2019ll even find an answer.\n\nhttps://pycon.ee\nhttps://twitter.com/PyConEstonia\nhttps://www.facebook.com/events/vaba-lava/pycon-estonia-2019/318811955508926/", "duration": 2510, "language": "eng", - "published_at": "2019-10-19T14:49:37.000Z", "recorded": "2019-10-03", "speakers": [ "Dom Weldon" diff --git a/pycon-estonia-2019/videos/python-in-its-natural-habitat-lauri-vosandi-pycon-estonia-2019.json b/pycon-estonia-2019/videos/python-in-its-natural-habitat-lauri-vosandi-pycon-estonia-2019.json index 846d8998c..6034c7408 100644 --- a/pycon-estonia-2019/videos/python-in-its-natural-habitat-lauri-vosandi-pycon-estonia-2019.json +++ b/pycon-estonia-2019/videos/python-in-its-natural-habitat-lauri-vosandi-pycon-estonia-2019.json @@ -2,7 +2,6 @@ "description": "Lauri has experience programming in Python for more than 10 years by now. Amongst other things Lauri has written Certidude in Python, an easy to use Certificate Authority software which he is using to issue VPN access at hackerspace he founded in October of 2017.\n\n----\n\nGet tickets for PyCon Estonia 2020 - https://bit.ly/pycon2020\n\nPython is nearing 3 decades in existence (yes, kinda hard to believe). Where do we go from here? We have the same question, hence PyCon 2019 will be a peek into the future of Python. Come, hang out with an inspired crowd, have rich, direct conversations and who knows, maybe you\u2019ll even find an answer.\n\nhttps://pycon.ee\nhttps://twitter.com/PyConEstonia\nhttps://www.facebook.com/events/vaba-lava/pycon-estonia-2019/318811955508926/", "duration": 3324, "language": "eng", - "published_at": "2019-10-20T04:28:04.000Z", "recorded": "2019-10-03", "speakers": [ "Lauri V\u00f5sandi" diff --git a/pycon-estonia-2019/videos/testing-apps-with-third-party-integrations-mike-solomon-pycon-estonia-2019.json b/pycon-estonia-2019/videos/testing-apps-with-third-party-integrations-mike-solomon-pycon-estonia-2019.json index f5f5afcd1..493251d25 100644 --- a/pycon-estonia-2019/videos/testing-apps-with-third-party-integrations-mike-solomon-pycon-estonia-2019.json +++ b/pycon-estonia-2019/videos/testing-apps-with-third-party-integrations-mike-solomon-pycon-estonia-2019.json @@ -2,7 +2,6 @@ "description": "Mike Solomon is the C.E.O. of Meeshkan, a company that makes tools for developers based in Helsinki, Finland. Mike is a Python, JavaScript, Haskell, C++ and Scheme developer that cut his teeth on the LilyPond project and has since worked on numerous projects ranging from open source to large commercial products. In the Python world, his main interests are testing, SMT solvers, and machine learning.\n\n----\n\nGet tickets for PyCon Estonia 2020 - https://bit.ly/pycon2020\n\nPython is nearing 3 decades in existence (yes, kinda hard to believe). Where do we go from here? We have the same question, hence PyCon 2019 will be a peek into the future of Python. Come, hang out with an inspired crowd, have rich, direct conversations and who knows, maybe you\u2019ll even find an answer.\n\nhttps://pycon.ee\nhttps://twitter.com/PyConEstonia\nhttps://www.facebook.com/events/vaba-lava/pycon-estonia-2019/318811955508926/", "duration": 2139, "language": "eng", - "published_at": "2019-10-19T16:33:39.000Z", "recorded": "2019-10-03", "speakers": [ "Mike Solomon" diff --git a/pycon-estonia-2019/videos/the-past-present-and-future-of-extending-python-travis-oliphant-pycon-estonia-2019.json b/pycon-estonia-2019/videos/the-past-present-and-future-of-extending-python-travis-oliphant-pycon-estonia-2019.json index af7bb7135..5582b4b98 100644 --- a/pycon-estonia-2019/videos/the-past-present-and-future-of-extending-python-travis-oliphant-pycon-estonia-2019.json +++ b/pycon-estonia-2019/videos/the-past-present-and-future-of-extending-python-travis-oliphant-pycon-estonia-2019.json @@ -2,7 +2,6 @@ "description": "Travis E. Oliphant is a Founder and CEO/CTO of Quansight, a consulting company that connects companies with open-source communities to help companies gain actionable quantitative insight from their data. Travis previously co-founded Anaconda Inc. and is now a Director. Since 1997, he has worked in the Python ecosystem, notably as the primary creator of the NumPy package and as a founding contributor of the SciPy package. Travis also started the Numba project and organized and led the teams that built Conda, Dask, Bokeh, and XND. Travis has a Ph.D. from the Mayo Clinic and B.S. and M.S. degrees in Mathematics and Electrical Engineering from Brigham Young University.\n\n----\n\nGet tickets for PyCon Estonia 2020 - https://bit.ly/pycon2020\n\nPython is nearing 3 decades in existence (yes, kinda hard to believe). Where do we go from here? We have the same question, hence PyCon 2019 will be a peek into the future of Python. Come, hang out with an inspired crowd, have rich, direct conversations and who knows, maybe you\u2019ll even find an answer.\n\nhttps://pycon.ee\nhttps://twitter.com/PyConEstonia\nhttps://www.facebook.com/events/vaba-lava/pycon-estonia-2019/318811955508926/", "duration": 3457, "language": "eng", - "published_at": "2019-10-20T06:26:50.000Z", "recorded": "2019-10-03", "speakers": [ "Travis Oliphant" diff --git a/pycon-sk-2019/videos/adam-stevko-be-a-good-colleague-and-help-your-security-engineer.json b/pycon-sk-2019/videos/adam-stevko-be-a-good-colleague-and-help-your-security-engineer.json index bb79b727f..83daa8546 100644 --- a/pycon-sk-2019/videos/adam-stevko-be-a-good-colleague-and-help-your-security-engineer.json +++ b/pycon-sk-2019/videos/adam-stevko-be-a-good-colleague-and-help-your-security-engineer.json @@ -1,7 +1,6 @@ { "description": "2018 was the year of GDPR. Security is a shared responsibility and has\nbecome even more important than ever before. Nevertheless, not everybody\nrealizes this and security incidents and problems continue to occur,\nthus resulting in catastrophical consequences for your business. In this\ntalk, I will be sharing how one can utilize best practices to deliver\nless vulnerable code by using tools to prevent security holes, how to\nhandle incidents from the developers\u2019 point of view and how you can\nassist your Security Engineers. By following these guidelines, you will\nmake your Security Engineers happy and your code more secure.\n", "duration": 2597, - "published_at": "2019-06-14T13:23:54.000Z", "recorded": "2019-03-22", "speakers": [ "Adam \u0160tevko" diff --git a/pycon-sk-2019/videos/anton-caceres-avoiding-macro-trouble-of-micro-services.json b/pycon-sk-2019/videos/anton-caceres-avoiding-macro-trouble-of-micro-services.json index 2688304d6..6f923848e 100644 --- a/pycon-sk-2019/videos/anton-caceres-avoiding-macro-trouble-of-micro-services.json +++ b/pycon-sk-2019/videos/anton-caceres-avoiding-macro-trouble-of-micro-services.json @@ -2,7 +2,6 @@ "description": "Any change in software architecture is a significant time investment.\nWriting microservices in Python is a joy, but when you decide on it,\nthere is often no way back. Therefore it is always an advantage to know\nwhat to expect in advance, not just from inspiring blog posts but also\nfrom the harsh reality.\n\nHaving a bunch of smooth as well as tricky microservice migrations in\nthe past, I would like to share typical pitfalls of choosing a framework\nstack, communication protocol, conventions, and deployment process \u2014 all\ncovered by real projects.\n", "duration": 1678, "language": "eng", - "published_at": "2019-06-14T13:27:15.000Z", "recorded": "2019-03-22", "speakers": [ "Anton Caceres" diff --git a/pycon-sk-2019/videos/christian-barra-lets-talk-about-mlops.json b/pycon-sk-2019/videos/christian-barra-lets-talk-about-mlops.json index dfa5b30cd..6c4c3e928 100644 --- a/pycon-sk-2019/videos/christian-barra-lets-talk-about-mlops.json +++ b/pycon-sk-2019/videos/christian-barra-lets-talk-about-mlops.json @@ -2,7 +2,6 @@ "description": "The quantity of hype around machine learning and AI is probably second\nonly to bitcoins and blockchains. But until a machine learning model is\ndeployed to production the value delivered to companies is approximately\nequal to zero. Together with the common mantra that data science can't\nuse agile/lean frameworks or that the best software engineering\npractises don't apply explains a lot about why often companies got burnt\nwith their data science projects and why generally they under delivered.\n\nMLOps is here to help, the machine-learning equivalent of DevOps: it\nsolves the problems of implementing machine-learning in production.\nDuring this talk I will introduce the data science lifecycle, the\nconcept of machine learning Ops, its characteristics, why is extremely\nrequired, how it compares to DevOps, how it will become a required\ncapabilities for any DS/ML/AI Team, tools available and how you can\nstart with it.\n", "duration": 1628, "language": "eng", - "published_at": "2019-06-14T16:00:20.000Z", "recorded": "2019-03-22", "speakers": [ "Christian Barra" diff --git a/pycon-sk-2019/videos/christoph-ritzer-intro-to-blockchain-with-python.json b/pycon-sk-2019/videos/christoph-ritzer-intro-to-blockchain-with-python.json index c761b4683..30f4fe599 100644 --- a/pycon-sk-2019/videos/christoph-ritzer-intro-to-blockchain-with-python.json +++ b/pycon-sk-2019/videos/christoph-ritzer-intro-to-blockchain-with-python.json @@ -2,7 +2,6 @@ "description": "This talk is structured into two parts of approximately equal length:\n\nFirst an introduction to blockchains in general. Most have heard about\nblockchains or bitcoin but how does it actually work and what is the\ntechnology driving it? This talk will explain what a cryptocurrency\nwallet is, how transactions are created and how they are brought\ntogether to form blocks that build the blockchain. I will outline\ndifferences between the most widely used implementations and the core\ntechnological concepts behind them, like Proof-of-Work versus\nProof-of-Stake, consensus algorithms, trustless systems and the ideas\nbehind them.\n\nIn the second half of the talk I draw on my experience working with\npython in my professional career and give an overview of the python\necosystem for blockhain and cryptocurrency development. I will showcase\nsome of the libraries and tools that exist, like btcpy and highlight\nsome of the pros and cons (mostly pros) of using python in this sector.\n", "duration": 1372, "language": "eng", - "published_at": "2019-08-06T12:22:02.000Z", "recorded": "2019-03-24", "speakers": [ "Christoph Ritzer" diff --git a/pycon-sk-2019/videos/dmitry-dygalo-testing-network-interactions-in-python.json b/pycon-sk-2019/videos/dmitry-dygalo-testing-network-interactions-in-python.json index b053c11d7..7596c9ba3 100644 --- a/pycon-sk-2019/videos/dmitry-dygalo-testing-network-interactions-in-python.json +++ b/pycon-sk-2019/videos/dmitry-dygalo-testing-network-interactions-in-python.json @@ -2,7 +2,6 @@ "description": "Many modern Python applications interact with 3rd parties over network\nand packages like requests and aiohttp are incredibly popular nowadays.\nIt makes it essential to know how to test network interactions. It is\nparticularly important for microservices - to connect them you need to\ndo network calls.\n\nThis talk will walk from most straightforward ad-hoc solutions to more\ntunable and extensible ones. You\u2019ll learn how to approach testing\nnetwork on different isolation levels in sync and async code, take away\nideas and practices how to use with popular Python tools like responses,\npook, VCR, and pytest.\n", "duration": 1548, "language": "eng", - "published_at": "2019-08-06T12:07:27.000Z", "recorded": "2019-03-23", "speakers": [ "Dmitry Dygalo" diff --git a/pycon-sk-2019/videos/edusummit-2019-aftermovie-pycon-sk.json b/pycon-sk-2019/videos/edusummit-2019-aftermovie-pycon-sk.json index daad1260e..588b052d1 100644 --- a/pycon-sk-2019/videos/edusummit-2019-aftermovie-pycon-sk.json +++ b/pycon-sk-2019/videos/edusummit-2019-aftermovie-pycon-sk.json @@ -1,7 +1,6 @@ { "description": "EduSummit 2019 Aftermovie - PyCon SK", "duration": 178, - "published_at": "2019-08-06T11:51:47.000Z", "recorded": "2019-03-22", "speakers": [ "Various speakers" diff --git a/pycon-sk-2019/videos/filip-stefanak-stability-with-a-hockey-stick.json b/pycon-sk-2019/videos/filip-stefanak-stability-with-a-hockey-stick.json index f9a088f5e..5b67069bd 100644 --- a/pycon-sk-2019/videos/filip-stefanak-stability-with-a-hockey-stick.json +++ b/pycon-sk-2019/videos/filip-stefanak-stability-with-a-hockey-stick.json @@ -2,7 +2,6 @@ "description": "\"Make fast and break things\" used to be a popular motto at Facebook, but\nat some point it needed to change. How can a fast-growing SaaS company\nlike Exponea handle the ever increasing need for service stability? We\nwill take a quick look at all the bazillion things that must be done,\npanic, and talk about how they can actually be done.\n", "duration": 1397, "language": "eng", - "published_at": "2019-06-14T16:02:57.000Z", "recorded": "2019-03-22", "speakers": [ "Filip \u0160tefa\u0148\u00e1k" diff --git a/pycon-sk-2019/videos/frantisek-benko-we-run-huge-in-memory-databases-in-gke-and-we-love-it.json b/pycon-sk-2019/videos/frantisek-benko-we-run-huge-in-memory-databases-in-gke-and-we-love-it.json index 03fdf87d0..56f6b2ba8 100644 --- a/pycon-sk-2019/videos/frantisek-benko-we-run-huge-in-memory-databases-in-gke-and-we-love-it.json +++ b/pycon-sk-2019/videos/frantisek-benko-we-run-huge-in-memory-databases-in-gke-and-we-love-it.json @@ -2,7 +2,6 @@ "description": "We invite You on the talk about our largest scale change in history of\nExponea. We'll show you how we managed to run multi-terrabyte sharded\nin- memory DBs in the environment of ephemeral containers, hundreds of\npython and go workers in multiple kubernetes namespaces and several\nhundred VMs in google cloud. You don't want to miss our way of\nautomating this system too.\n", "duration": 2750, "language": "eng", - "published_at": "2019-08-06T11:13:39.000Z", "recorded": "2019-03-23", "speakers": [ "Franti\u0161ek Benko" diff --git a/pycon-sk-2019/videos/gabriel-lachmann-jan-suchal-ako-prestat-kradnut-v-statnom-it.json b/pycon-sk-2019/videos/gabriel-lachmann-jan-suchal-ako-prestat-kradnut-v-statnom-it.json index 41b5b72f9..7a874c7a8 100644 --- a/pycon-sk-2019/videos/gabriel-lachmann-jan-suchal-ako-prestat-kradnut-v-statnom-it.json +++ b/pycon-sk-2019/videos/gabriel-lachmann-jan-suchal-ako-prestat-kradnut-v-statnom-it.json @@ -1,7 +1,6 @@ { "description": "Slovensko.Digital u\u017e vy\u0161e troch rokov zasahuje do ve\u013ek\u00fdch \u0161t\u00e1tnych IT\nprojektov a informatiz\u00e1cie verejnej spr\u00e1vy ako takej. Dozviete sa \u010di to\nbolo na nie\u010do vlastne dobr\u00e9, \u010do sa bude dia\u0165 v informatiz\u00e1cii \u010falej, \u010do\ns t\u00fdm vie urobi\u0165 be\u017en\u00fd program\u00e1tor a IT komunita. Povieme V\u00e1m nie\u010do\nbli\u017e\u0161ie o eurofondovom projekte, ktor\u00fd n\u00e1m schv\u00e1lili za Kali\u0148\u00e1ka a ako\ntieto peniaze vyu\u017e\u00edvame, aby sme robili zle zlodejom kon\u0161trukt\u00edvne\nvylep\u0161ovali \u0161t\u00e1tne IT. Povieme aj pozit\u00edvne spr\u00e1vy o t\u00e9mach ako Open API\n\u010di My Data, ktor\u00e9 by mohli posun\u00fa\u0165 informatiz\u00e1ciu tak dopredu, \u017ee n\u00e1m aj\nEst\u00f3nci bud\u00fa z\u00e1vidie\u0165. Ke\u010f v\u0161etko p\u00f4jde pod\u013ea pl\u00e1nu mali by sa toho\ndo\u017ei\u0165 nielen na\u0161e bud\u00face vn\u00fa\u010dat\u00e1..\n", "duration": 3051, - "published_at": "2019-07-01T22:34:54.000Z", "recorded": "2019-03-22", "speakers": [ "Gabriel Lachmann", diff --git a/pycon-sk-2019/videos/hans-christian-fessl-think-about-the-user.json b/pycon-sk-2019/videos/hans-christian-fessl-think-about-the-user.json index 049b6b891..b1493752b 100644 --- a/pycon-sk-2019/videos/hans-christian-fessl-think-about-the-user.json +++ b/pycon-sk-2019/videos/hans-christian-fessl-think-about-the-user.json @@ -2,7 +2,6 @@ "description": "As developers, we are mainly focused on making things work. Business\nlogic and process analysis, reinventing the wheel and questioning every\nline of code are what we do best. But when it comes to making this very\ncomplex systems work for users, we are quickly frustrated, because it\nfeels like they just don't get it. As people of code, we are no longer\nregular users because we understand what is going on behind the curtains\nof the big show that is called a User Interface. What means, that by\ndefault, users have a totally different experience then we developers\ndo. In this talk, I want to extend my lightning talk from last year to\nhalf an hour, to give you some tips on how to get a users perspective on\nthe user interface you want or have to create.\n", "duration": 2057, "language": "eng", - "published_at": "2019-08-06T12:16:51.000Z", "recorded": "2019-03-24", "speakers": [ "Hans Christian Fe\u00dfl" diff --git a/pycon-sk-2019/videos/helen-li-nucleus-an-open-source-library-for-genomics-data-and-machine-learning.json b/pycon-sk-2019/videos/helen-li-nucleus-an-open-source-library-for-genomics-data-and-machine-learning.json index 9b16a9905..53a6bf3aa 100644 --- a/pycon-sk-2019/videos/helen-li-nucleus-an-open-source-library-for-genomics-data-and-machine-learning.json +++ b/pycon-sk-2019/videos/helen-li-nucleus-an-open-source-library-for-genomics-data-and-machine-learning.json @@ -2,7 +2,6 @@ "description": "Nucleus is a Python library designed to make it easy to read, write, and\nanalyze genomics data in common bioinformatics file formats such as SAM\nand VCF. In addition, Nucleus enables seamless integration with the\nTensorFlow machine learning (ML) framework. Nucleus is heavily used in\nDeepVariant, a state-of-the-art convolutional neural network variant\ncaller, and in other ML projects at Google AI Genomics. This talk will\ngive an overview of Nucleus, its features, and its APIs.\n", "duration": 1709, "language": "eng", - "published_at": "2019-06-30T20:49:18.000Z", "recorded": "2019-03-23", "speakers": [ "Helen Li" diff --git a/pycon-sk-2019/videos/honza-kral-so-you-want-to-be-an-engineer.json b/pycon-sk-2019/videos/honza-kral-so-you-want-to-be-an-engineer.json index 9b23e7b13..9f5fd9aa5 100644 --- a/pycon-sk-2019/videos/honza-kral-so-you-want-to-be-an-engineer.json +++ b/pycon-sk-2019/videos/honza-kral-so-you-want-to-be-an-engineer.json @@ -2,7 +2,6 @@ "description": "Software Engineering is a new field, have we figured out all the ins and\nouts of how best to work in it?\n\nFor a lot of people the technology field has always been a semi-magical\nplace where engineers use their arcane knowledge to tease the secrets\nfrom the computer and unfortunately we have bought into this myth... Let\nus examine what are some of the myths surrounding out field and which\naspects might actually be useful. Is SW engineering an art or just\nanother skill? Is talent required? Does it even matter? What about the\n10X engineer...?\n\nI will be sharing some insights that I have learned from trying to\nanswer these questions for myself, part based on research, part on\npersonal opinion.\n", "duration": 1774, "language": "eng", - "published_at": "2019-07-01T22:30:38.000Z", "recorded": "2019-03-23", "speakers": [ "Honza Kr\u00e1l" diff --git a/pycon-sk-2019/videos/ingrid-budau-the-apprentices-enthusiastic-guide-to-pandas.json b/pycon-sk-2019/videos/ingrid-budau-the-apprentices-enthusiastic-guide-to-pandas.json index bca9caa5f..19ac043e8 100644 --- a/pycon-sk-2019/videos/ingrid-budau-the-apprentices-enthusiastic-guide-to-pandas.json +++ b/pycon-sk-2019/videos/ingrid-budau-the-apprentices-enthusiastic-guide-to-pandas.json @@ -2,7 +2,6 @@ "description": "The Pandas soon realized there's no way they are going to survive the\nordeals and hardships of this world, if they didn't finally and without\nthe blink of an eye of hesitation pull themselves together, stop being\nthe lazy fluffy beings, they have long been known for and start\nreorganizing their lives ASAP. They needed a fresh view over the world\nand its intrinsic mechanisms, light had to be shed upon the information\nthey possessed about survival, in a few words, they had to start over.\nThis is how in the midst of the forest a high performative library was\ncoming to life, whose powerful toolkit would enable them a long lasting\nlife of happiness and joy. This long-dreamed library should import the\ninformation they have been gathering about the world for long gone\ncenturies and help them look at it through different eyes. They wanted\nto structure their world views and beliefs into sensible types and\ncategories, remove from their genes their procrastinative behavioural\npatterns, drop them altogether. After laborious efforts of dealing with\nmissing data about their surroundings, grouping and counting the\nmeaningful rest, filtering the nonsensical superstitions, they could\nfinally and, without doubt, point out with precision, where the bamboo\nsprouts were freshest, most succulent, fiber rich, absolutely\nscrumptious and the moment of the year, dictated by the moon calendar,\nwhen they are fluffiest, cosiest, most willing and irresistibly fall for\none another and cuddle up. They put all this secret survival kit into\neasily understandable pictures and graphs for the dreamers out of them,\nwho weren't prepared to put in all the effort of learning all those\ncomplicated symbols, just in order to survive and just wanted to admire\nthe sky goddess, the moon. But wait, they didn't have a name for their\ngrandiose library, so they just wanted to make a statement of being the\nmost diligent creature of them all and called it, simply and\nunmistakably, pandas!\n", "duration": 1542, "language": "eng", - "published_at": "2019-08-06T12:18:06.000Z", "recorded": "2019-03-24", "speakers": [ "Ingrid Budau" diff --git a/pycon-sk-2019/videos/jakub-balas-using-python-in-new-space-industry-is-not-a-rocket-science.json b/pycon-sk-2019/videos/jakub-balas-using-python-in-new-space-industry-is-not-a-rocket-science.json index c570f8d0c..983047654 100644 --- a/pycon-sk-2019/videos/jakub-balas-using-python-in-new-space-industry-is-not-a-rocket-science.json +++ b/pycon-sk-2019/videos/jakub-balas-using-python-in-new-space-industry-is-not-a-rocket-science.json @@ -2,7 +2,6 @@ "description": "What we do at Iceye and how Python is allowing us to grow fast and be\nunique.\n", "duration": 2203, "language": "eng", - "published_at": "2019-06-14T13:20:29.000Z", "recorded": "2019-03-22", "speakers": [ "Jakub Balas" diff --git a/pycon-sk-2019/videos/jakub-mertus-automaticka-korekcia-pisaneho-prejavu-prirodzeneho-jazyka.json b/pycon-sk-2019/videos/jakub-mertus-automaticka-korekcia-pisaneho-prejavu-prirodzeneho-jazyka.json index 5acd76de5..e40a210a0 100644 --- a/pycon-sk-2019/videos/jakub-mertus-automaticka-korekcia-pisaneho-prejavu-prirodzeneho-jazyka.json +++ b/pycon-sk-2019/videos/jakub-mertus-automaticka-korekcia-pisaneho-prejavu-prirodzeneho-jazyka.json @@ -1,7 +1,6 @@ { "description": "Banka disponuje ve\u013ek\u00fdm objemom ne\u0161trukt\u00farovan\u00fdch d\u00e1t. Na ich pou\u017eitie je\npotrebn\u00e9 tieto d\u00e1ta o\u010disti\u0165. Preto vyv\u00edjame modul na korekciu\ngramatick\u00fdch ch\u00fdb a preklepov, ktor\u00fd sa sklad\u00e1 z troch hlav\u00fdch \u010dast\u00ed.\nPrvou \u010das\u0165ou je takzvan\u00fd \"re-diakritiz\u00e1tor\" zameran\u00fd na obnovenie\ndiakritiky v slove bez nej. Druh\u00e1 \u010das\u0165 je znakov\u00fd model SymSpell. Ten\nnavrhuje mo\u017en\u00e9 opravy slova na z\u00e1klade Damerau-Levenshtein metriky.\nTretia \u010das\u0165 vyu\u017e\u00edva zn\u00e1my model Word2Vec na pochopenie kontextu\u00e1lnych\nz\u00e1vislost\u00ed slov v relevantnom dokumente. V\u00fdvoj modulu nie je e\u0161te\nukon\u010den\u00fd, av\u0161ak subjekt\u00edvne pos\u00fadenie modulu indikuje pou\u017eite\u013enos\u0165 v\n\u0161pecifickom bankovom prostred\u00ed.\n", "duration": 1277, - "published_at": "2019-08-06T12:12:44.000Z", "recorded": "2019-03-24", "speakers": [ "Jakub Mertus" diff --git a/pycon-sk-2019/videos/jakub-sedinar-odoo.json b/pycon-sk-2019/videos/jakub-sedinar-odoo.json index 102daa9dd..4d788e00f 100644 --- a/pycon-sk-2019/videos/jakub-sedinar-odoo.json +++ b/pycon-sk-2019/videos/jakub-sedinar-odoo.json @@ -1,7 +1,6 @@ { "description": "Odoo na Slovensku - game changer slovensk\u00e9ho ERP prostredia. Chceme v\u00e1m\npoveda\u0165 a uk\u00e1za\u0165, \u010do je Odoo, ako funguje, pre\u010do je lep\u0161ie ako in\u00e9\nrie\u0161enia. Ako je zameran\u00fd, ak\u00e1 je architekt\u00fara rie\u0161enia...\n", "duration": 1752, - "published_at": "2019-06-14T13:16:00.000Z", "recorded": "2019-03-22", "speakers": [ "Jakub \u0160edin\u00e1r" diff --git a/pycon-sk-2019/videos/joel-lord-i-dont-care-about-security-and-neither-should-you.json b/pycon-sk-2019/videos/joel-lord-i-dont-care-about-security-and-neither-should-you.json index d63a62831..14dabdfc8 100644 --- a/pycon-sk-2019/videos/joel-lord-i-dont-care-about-security-and-neither-should-you.json +++ b/pycon-sk-2019/videos/joel-lord-i-dont-care-about-security-and-neither-should-you.json @@ -2,7 +2,6 @@ "description": "Remember when setting up a login page was easy? It seems like nowadays\nit can take weeks to start a project -- creating a signup form, a login\nform, a password recovery screen, and all the validation in between. And\nyou haven't even started on security considerations yet. During this\npresentation, the attendees will be introduced to OpenID Connect and\nOAuth. They will also learn how to leverage these technologies to create\nmore secure applications. Most importantly, they will learn how to\ndelegate authorization and authentication so they can focus on their\nreal work and forget about all that security stuff.\n\nBesides this talk Joel is planning a workshop on building a passwordless\nauthentication server.\n", "duration": 1842, "language": "eng", - "published_at": "2019-06-30T20:34:43.000Z", "recorded": "2019-03-23", "speakers": [ "Joel Lord" diff --git a/pycon-sk-2019/videos/jorge-torres-machine-learning-democratization.json b/pycon-sk-2019/videos/jorge-torres-machine-learning-democratization.json index 70f768ff5..ff20dfafc 100644 --- a/pycon-sk-2019/videos/jorge-torres-machine-learning-democratization.json +++ b/pycon-sk-2019/videos/jorge-torres-machine-learning-democratization.json @@ -2,7 +2,6 @@ "description": "We can't expect Machine Learning experts to be both domain experts\nacross various fields and also true experts in ML, it is crucial to\nrethink how we build tools so that we can provide Machine Learning\ncapabilities to experts in various disciplines that are not necessarily\nsavvy in machine learning, but are experts in their own domains, so that\nthey can too become participants of the data science community.\n\nThis talk is divided into three sections:\n\nThe first one, dives into the importance of democratizing Machine\nLearning, its objective is to demonstrate why now this is a crucial\nissue to be solved and to show the risks and problems that present\nthemselves when Machine Learning Engineers take in the responsibility of\nbuilding predictive technologies in domains that are not necessarily\nwhere they are the most experts on.\n\nThe second part of this talk focuses on some solutions and approaches to\nchallenges of democratizing Machine Learning, as well as the journey and\nresults we have seen at MindsDB while at this endeavor as well as what\nwould be new ways that the ML community can think of the next generation\nof tools being built.\n\nThe third and most extensive section of this talk, focuses on the new\nissues that are born once machine Learning capabilities are on the hands\nof not ML experts, more importantly, the implications of delegating the\nMachine Learning machinery to a system, and what is important in order\nto trust those systems. Which leads to the importance in further\ndeveloping our understanding of explainability, interpretability, and\nrobustness of ML.\n", "duration": 1225, "language": "eng", - "published_at": "2019-06-14T13:00:20.000Z", "recorded": "2019-03-22", "speakers": [ "Jorge Torres" diff --git a/pycon-sk-2019/videos/juraj-hromkovic-programovanie-s-logo-filozofiou-ako-model-vseobecneho-vzdelavania-pre-vsetky-odbory.json b/pycon-sk-2019/videos/juraj-hromkovic-programovanie-s-logo-filozofiou-ako-model-vseobecneho-vzdelavania-pre-vsetky-odbory.json index 61e1621d4..404661fa5 100644 --- a/pycon-sk-2019/videos/juraj-hromkovic-programovanie-s-logo-filozofiou-ako-model-vseobecneho-vzdelavania-pre-vsetky-odbory.json +++ b/pycon-sk-2019/videos/juraj-hromkovic-programovanie-s-logo-filozofiou-ako-model-vseobecneho-vzdelavania-pre-vsetky-odbory.json @@ -1,7 +1,6 @@ { "description": "Jean Piaget pri\u0161iel s konceptom \"learning by doing\" s cie\u013eom zv\u00fd\u0161i\u0165 motiv\u00e1ciu a trvanlivos\u0165 vedomost\u00ed a preh\u013abi\u0165 ch\u00e1panie \u0161tudovan\u00fdch objektov v\u010faka vlastnej aktivite v procese u\u010denia sa. Seymour Papert obohatil tento koncept o \"getting things to work\" vo svojej implement\u00e1cii v\u00fdu\u010dby programovania v programovacom jazyku LOGO. My sme na ETH skombinovali tento pr\u00edstup s konceptom \"critical thinking\", ktor\u00fd odmieta vyu\u010dova\u0165 hotov\u00e9 produkty vedy (fakty, modely, met\u00f3dy,...), ale vy\u017eaduje si u\u010di\u0165 procesy ich vytv\u00e1rania. Pre pr\u00edpravu vzdel\u00e1vania tis\u00edcky u\u010dite\u013eov z\u00e1kladn\u00fdch a stredn\u00fdch \u0161k\u00f4l pre v\u00fdu\u010dbu programovania, sme vyvinuli v r\u00e1mci t\u00fdchto v\u00fdu\u010dbov\u00fdch konceptov prostredia na v\u00fdu\u010dbu programovania, ako aj in\u00fdch informatick\u00fdch t\u00e9m a nap\u00edsali k nim s\u00e9riu u\u010debn\u00edc. V tejto predn\u00e1\u0161ke sa budeme venova\u0165 prostrediam TigerJython (dialekt Pythonu) a viacer\u00fdm verzi\u00e1m jazyka LOGO (textov\u00fdm i blokov\u00fdm) a vysvetl\u00edme proces ich tvorby z poh\u013eadu potrieb v\u00fdu\u010dby programovania. Predstav\u00edme aj informatiku ako viac ne\u017e 5000 rokov star\u00fa vedu a ozrejm\u00edme jej pr\u00ednos pre v\u0161eobecn\u00e9 vzdelanie a jej nenahradite\u013enos\u0165 vo vzdel\u00e1van\u00ed bud\u00facich gener\u00e1cii.", "duration": 4781, - "published_at": "2019-06-27T15:11:05.000Z", "recorded": "2019-03-22", "speakers": [ "Juraj Hromkovi\u010d" diff --git a/pycon-sk-2019/videos/kalyan-dikshit-smart-homes-tor-ssh-secure-smart-home.json b/pycon-sk-2019/videos/kalyan-dikshit-smart-homes-tor-ssh-secure-smart-home.json index f0b3e364e..fbd4e32e6 100644 --- a/pycon-sk-2019/videos/kalyan-dikshit-smart-homes-tor-ssh-secure-smart-home.json +++ b/pycon-sk-2019/videos/kalyan-dikshit-smart-homes-tor-ssh-secure-smart-home.json @@ -2,7 +2,6 @@ "description": "\u201cInternet of Things,\u201d \u201cSmart Devices,\u201d \u201cSmart Homes.\u201d people are\nfascinated with them and their usage. But the most important things they\ntend to forget are \u201cSecurity & Privacy\u201d. Using Tor to divert prying eyes\naway from our connected homes.\n", "duration": 1701, "language": "eng", - "published_at": "2019-08-06T12:24:27.000Z", "recorded": "2019-03-24", "speakers": [ "Kalyan Dikshit" diff --git a/pycon-sk-2019/videos/laurent-pickard-building-smarter-solutions-with-machine-learning-from-magic-to-reality.json b/pycon-sk-2019/videos/laurent-pickard-building-smarter-solutions-with-machine-learning-from-magic-to-reality.json index 26bc9447b..707b2fd45 100644 --- a/pycon-sk-2019/videos/laurent-pickard-building-smarter-solutions-with-machine-learning-from-magic-to-reality.json +++ b/pycon-sk-2019/videos/laurent-pickard-building-smarter-solutions-with-machine-learning-from-magic-to-reality.json @@ -2,7 +2,6 @@ "description": "\u201cAny sufficiently advanced technology is indistinguishable from magic.\u201d\nWell, Machine Learning can look like magic, but you don't need to be a\ndata scientist or an ML researcher to develop with ML.\n\nSo, what about making your solution smarter without any knowledge in AI?\nWith pre-trained models and a few lines of code, Machine Learning APIs\ncan analyze your data. Moreover, AutoML techniques can now help in\ngetting even more specific insights tailored to your needs.\n\nIn this session, you'll see how to transform or extract information from\ntext, image, audio & video with the latest ML APIs, how to train an\nAutoML custom model, and you'll be an active player of a live demo.\nDon't put your smartphone in airplane mode!\n", "duration": 3254, "language": "eng", - "published_at": "2019-06-30T20:20:03.000Z", "recorded": "2019-03-23", "speakers": [ "Laurent Picard" diff --git a/pycon-sk-2019/videos/lightning-talks.json b/pycon-sk-2019/videos/lightning-talks.json index 6602362bf..bd4422587 100644 --- a/pycon-sk-2019/videos/lightning-talks.json +++ b/pycon-sk-2019/videos/lightning-talks.json @@ -2,7 +2,6 @@ "description": "A lightning talk is a very short presentation lasting only a few minutes, given at a conference or similar forum. Several lightning talks will usually be delivered by different speakers in a single session.", "duration": 2766, "language": "eng", - "published_at": "2019-08-06T11:18:46.000Z", "recorded": "2019-03-22", "speakers": [ "Various speakers" diff --git a/pycon-sk-2019/videos/luke-spademan-controling-a-robotic-arm-with-microbits.json b/pycon-sk-2019/videos/luke-spademan-controling-a-robotic-arm-with-microbits.json index 7332c8f99..82f70f89c 100644 --- a/pycon-sk-2019/videos/luke-spademan-controling-a-robotic-arm-with-microbits.json +++ b/pycon-sk-2019/videos/luke-spademan-controling-a-robotic-arm-with-microbits.json @@ -2,7 +2,6 @@ "description": "Captivating a group of children for a sustained period of time is\nnotoriously hard. I will be exporing with you how the micro:bit can be\nused to engage a young audience with interactive demos and programming\nactivities.\n\nChildren love to physically interact with hardware. That is what is\ngreat about the micro:bit. It not only has built in LEDs and buttons but\ncan also control bigger more exciting electronics like robotic arms.\nThis allows for setup that gives students of all ability levels a\nchallenge. Students that are beginners to programming can be given a\nmore complete program or walk throught a worksheet that takes them\nthrough the process of controling the device (in this case a robotic\narm) step by step. Students that have higher programming skills can be\ngiven a more bare bones / boilerplate source file and can write code to\ncontrol the arm with less guidance.\n", "duration": 1008, "language": "eng", - "published_at": "2019-08-06T12:10:17.000Z", "recorded": "2019-03-22", "speakers": [ "Luke Spademan" diff --git a/pycon-sk-2019/videos/manoj-pandey-gotchas-and-landmines-in-python.json b/pycon-sk-2019/videos/manoj-pandey-gotchas-and-landmines-in-python.json index cd457493c..a37a5adbc 100644 --- a/pycon-sk-2019/videos/manoj-pandey-gotchas-and-landmines-in-python.json +++ b/pycon-sk-2019/videos/manoj-pandey-gotchas-and-landmines-in-python.json @@ -2,7 +2,6 @@ "description": "Python may be one of the simplest and most flexible programming\nlanguages out there, but it is still a programming language. It still\nhas syntax, datatypes, and some occasional dark corners. Python \u201cwarts\u201d\nare things for which people have criticised Python, typically aspects of\nthe language or mechanisms of its implementation, because such aspects\neither expose certain surprising inconsistencies, are regarded as\nomissions, or cause irritation for parts of the community in some sense.\nThis talk will be about common pitfalls (termed warts/landmines) that\npeople face using Python programming language.\n\nThe only pre-requisite is a basic familiarity with Python programming\nlanguage. Although it\u2019ll be good that you have laptops with Python\nalready setup, it\u2019s not required as such !\n", "duration": 1891, "language": "eng", - "published_at": "2019-08-06T11:58:43.000Z", "recorded": "2019-03-23", "speakers": [ "Manoj Pandey" diff --git a/pycon-sk-2019/videos/markus-holtermann-less-obvious-things-to-do-with-djangos-orm.json b/pycon-sk-2019/videos/markus-holtermann-less-obvious-things-to-do-with-djangos-orm.json index ff89e5ecf..73accc59d 100644 --- a/pycon-sk-2019/videos/markus-holtermann-less-obvious-things-to-do-with-djangos-orm.json +++ b/pycon-sk-2019/videos/markus-holtermann-less-obvious-things-to-do-with-djangos-orm.json @@ -2,7 +2,6 @@ "description": "Django\u2019s ORM is powerful. And it has been for ages. It only became even\nmore powerful over the last years. Django\u2019s ORM can be intimidating\nbecause it can do so many things. And it can also be confusing when\ntrying to find out how to do things.\n", "duration": 2135, "language": "eng", - "published_at": "2019-06-30T20:15:08.000Z", "recorded": "2019-03-23", "speakers": [ "Markus Holtermann" diff --git a/pycon-sk-2019/videos/martin-strycek-your-flight-ticket-is-just-the-tip-of-the-iceberg.json b/pycon-sk-2019/videos/martin-strycek-your-flight-ticket-is-just-the-tip-of-the-iceberg.json index 7fc3feaed..374f78c31 100644 --- a/pycon-sk-2019/videos/martin-strycek-your-flight-ticket-is-just-the-tip-of-the-iceberg.json +++ b/pycon-sk-2019/videos/martin-strycek-your-flight-ticket-is-just-the-tip-of-the-iceberg.json @@ -2,7 +2,6 @@ "description": "Your flight ticket is just the tip of the iceberg!", "duration": 1612, "language": "eng", - "published_at": "2019-08-06T11:15:32.000Z", "recorded": "2019-03-22", "speakers": [ "Martin Str\u00fd\u010dek" diff --git a/pycon-sk-2019/videos/meredydd-luff-anvil-full-stack-web-apps-with-nothing-but-python.json b/pycon-sk-2019/videos/meredydd-luff-anvil-full-stack-web-apps-with-nothing-but-python.json index 64c4de6d3..7fb2c7eb1 100644 --- a/pycon-sk-2019/videos/meredydd-luff-anvil-full-stack-web-apps-with-nothing-but-python.json +++ b/pycon-sk-2019/videos/meredydd-luff-anvil-full-stack-web-apps-with-nothing-but-python.json @@ -2,7 +2,6 @@ "description": "Building for the modern web is complicated and error-prone: a typical\nweb app requires five different languages and four or five frameworks\njust to get started. Wouldn't it be nicer if we could do it all in\nPython? With Anvil, you can design your page visually, write your\nbrowser-side and server-side code in pure Python, and even set up a\nbuilt-in database in seconds. In this talk, Meredydd will walk us\nthrough how Anvil works under the bonnet, and the challenges of building\na programming system that's easy to use without sacrificing power.\n", "duration": 3061, "language": "eng", - "published_at": "2019-06-14T12:51:01.000Z", "recorded": "2019-03-22", "speakers": [ "Meredydd Luff" diff --git a/pycon-sk-2019/videos/miroslav-binas-vyucba-programovania-pomocou-tvorby-hier-v-pygame-zero.json b/pycon-sk-2019/videos/miroslav-binas-vyucba-programovania-pomocou-tvorby-hier-v-pygame-zero.json index b948206c4..06663f2da 100644 --- a/pycon-sk-2019/videos/miroslav-binas-vyucba-programovania-pomocou-tvorby-hier-v-pygame-zero.json +++ b/pycon-sk-2019/videos/miroslav-binas-vyucba-programovania-pomocou-tvorby-hier-v-pygame-zero.json @@ -1,7 +1,6 @@ { "description": "V\u00fdu\u010dba programovania pomocou tvorby hier v PyGame Zero", "duration": 1650, - "published_at": "2019-06-27T14:58:03.000Z", "recorded": "2019-03-22", "speakers": [ "Miroslav Bi\u0148as" diff --git a/pycon-sk-2019/videos/miroslav-sedivy-a-day-has-only-24-1-hours.json b/pycon-sk-2019/videos/miroslav-sedivy-a-day-has-only-24-1-hours.json index 6413a7928..8797e275b 100644 --- a/pycon-sk-2019/videos/miroslav-sedivy-a-day-has-only-24-1-hours.json +++ b/pycon-sk-2019/videos/miroslav-sedivy-a-day-has-only-24-1-hours.json @@ -2,7 +2,6 @@ "description": "Not only will the Sunday in the week after this PyCon.SK 2019 be one\nhour shorter, but also it may steal you even more sleep because of all\nyour Python code that has to work smoothly during daylight saving time\nchanges or otherwise manipulate time zones. While correct working with\ndatetime objects and offsets can be mastered, you have no control of the\ninformation on local time zones. Exclusive domain of geographers one\nhundred years ago, the time zones adjustments became a toy in the hands\nof governments all around the world.\n\nAfter a short overview of Pythons datetime and pytz libraries you'll\nlearn how the information on time zone changes gets into your system.\nWe'll fly quickly around the world's over five hundred time zones, but\nwill also focus on the case of Slovakia and surrounding countries. Two\ncenturies of propaganda and chaos in thirty minutes. Maybe that will\nmake you want to avoid time zones in your code altogether!\n", "duration": 1738, "language": "eng", - "published_at": "2019-06-14T16:31:50.000Z", "recorded": "2019-03-22", "speakers": [ "Miroslav \u0160ediv\u00fd" diff --git a/pycon-sk-2019/videos/miroslava-sturmova-marek-visnovec-robert-junas-objavovanie-vpythonu-v-dudley-college.json b/pycon-sk-2019/videos/miroslava-sturmova-marek-visnovec-robert-junas-objavovanie-vpythonu-v-dudley-college.json index 4fd93f834..e074e9c81 100644 --- a/pycon-sk-2019/videos/miroslava-sturmova-marek-visnovec-robert-junas-objavovanie-vpythonu-v-dudley-college.json +++ b/pycon-sk-2019/videos/miroslava-sturmova-marek-visnovec-robert-junas-objavovanie-vpythonu-v-dudley-college.json @@ -1,7 +1,6 @@ { "description": "V marci 2017 dostali \u017eiaci Strednej priemyselnej \u0161koly technickej v\nMartine (SP\u0160T) prv\u00fdkr\u00e1t pr\u00edle\u017eitos\u0165 absolvova\u0165 prax v Dudley College vo\nVe\u013ekej Brit\u00e1nii. V\u010faka \u00faspe\u0161n\u00e9mu projektu Erasmus+ \u201eTuriec do Eur\u00f3py,\nEur\u00f3pa do Turca\u201c, ktor\u00fd pripravila K. A. B. A. Slovensko, sa tu\nzozn\u00e1mili s programovac\u00edm jazykom Python. Projekt bol \u00faspe\u0161n\u00fd aj v\n\u0161kolskom roku 2017/2018. Dudley College sa tak stala na dva marcov\u00e9\nt\u00fd\u017edne \u0161kolou pre desiatich tretiakov technick\u00e9ho l\u00fdcea SP\u0160T. Odborn\u00fd\nprogram bol zameran\u00fd na tvorbu efekt\u00edvnych algoritmov, rozvoj logick\u00e9ho\nmyslenia, grafiku a n\u00e1stroj pre vytv\u00e1ranie 3D anim\u00e1ci\u00ed v re\u00e1lnom \u010dase -\nVPython. Svoje nadobudnut\u00e9 vedomosti a praktick\u00e9 zru\u010dnosti odovzdali\n\u00fa\u010dastn\u00edci mobility aj \u010falej. U\u010ditelia informatiky SP\u0160T a Gymn\u00e1zia\nViliama Paul\u00ednyho T\u00f3tha (GVPT) v Martine nadviazali na \u00faspe\u0161n\u00fa\nspolupr\u00e1cu z minul\u00e9ho roka, op\u00e4\u0165 spojili svoje sily a pripravili druh\u00fd\nro\u010dn\u00edk odbornej konferencie PYTHON DAYS 2.0. V d\u0148och 31.5 - 1.6.2018\nodzneli na GVPT zauj\u00edmav\u00e9 predn\u00e1\u0161ky, robilo sa ve\u013ea workshopov. Na\njednom z nich sme predstavili gymnazistom VPython a v\u00fdvojov\u00e9 prostredie\nGlowScript. V\u00fdstupy z mobility Erasmus+ tak neostali len doma, na SP\u0160T,\nale nadobudli ove\u013ea \u0161ir\u0161\u00ed rozmer. Na konferencii PyCON SK 2019\npredstav\u00edme toto prostredie, jeho n\u00e1stroje a uk\u00e1\u017eeme pr\u00e1ce, ktor\u00e9 sme\nvytvorili.\n", "duration": 1637, - "published_at": "2019-06-27T15:02:57.000Z", "recorded": "2019-03-22", "speakers": [ "Miroslava \u0160turmov\u00e1", diff --git a/pycon-sk-2019/videos/mislav-cimpersak-on-the-edge-of-leadership.json b/pycon-sk-2019/videos/mislav-cimpersak-on-the-edge-of-leadership.json index ccb6eeffd..49af49060 100644 --- a/pycon-sk-2019/videos/mislav-cimpersak-on-the-edge-of-leadership.json +++ b/pycon-sk-2019/videos/mislav-cimpersak-on-the-edge-of-leadership.json @@ -2,7 +2,6 @@ "description": "How to lead a dev team with zero personal experience in people\nmanagement while still having no idea how to manage even your own time.\nAfter several years of fairly regular software developer career path,\nI've found myself in a new role that I've unconsciously put myself into.\nBeing a solid senior developer, management at the time agreed that I am\na good candidate for the team lead. Being promoted to a new personal\nlevel of incompetence I felt lost and confused in my new role, but made\na pact with myself to see this one through to the end. And so it came\nthat for the past few years, I've been development team lead for several\nteams.\n\nWith zero formal training, relying on my intuition and various random\npieces of advice I picked up from people smarter than me I've more or\nless successfully lead my teams to their goals. On that path, I've\nlearned tremendously, improved myself and hopefully, my teams. I would\nlike to share what worked and what didn't, my emotions, my regrets, and\nmy wins.\n", "duration": 1898, "language": "eng", - "published_at": "2019-08-06T12:05:18.000Z", "recorded": "2019-03-23", "speakers": [ "Mislav Cimper\u0161ak" diff --git a/pycon-sk-2019/videos/pawel-lewtak-long-term-it-projects.json b/pycon-sk-2019/videos/pawel-lewtak-long-term-it-projects.json index ad08ef7fd..75ad1eaff 100644 --- a/pycon-sk-2019/videos/pawel-lewtak-long-term-it-projects.json +++ b/pycon-sk-2019/videos/pawel-lewtak-long-term-it-projects.json @@ -2,7 +2,6 @@ "description": "Not everybody has a chance to work on new project from scratch.\nEverybody has some ideas what to do for such a project so it'll be a\ngood one instead of legacy in a near future. During this talk I want to\nshare my experience and show what works for me. I'll start from project\nanalysis and end on deployment and beyond. I want to discuss some common\nmistakes and misconceptions and hopefully give you some ideas you could\nuse for your own project regardless of its size. With a bit of work\nsome/all could be applied to legacy projects as well.\n", "duration": 2503, "language": "eng", - "published_at": "2019-08-06T12:08:49.000Z", "recorded": "2019-03-23", "speakers": [ "Pawe\u0142 Lewtak" diff --git a/pycon-sk-2019/videos/peter-kucera-programujeme-v-pythone-na-strednej-skole.json b/pycon-sk-2019/videos/peter-kucera-programujeme-v-pythone-na-strednej-skole.json index c0a56267a..1296726e8 100644 --- a/pycon-sk-2019/videos/peter-kucera-programujeme-v-pythone-na-strednej-skole.json +++ b/pycon-sk-2019/videos/peter-kucera-programujeme-v-pythone-na-strednej-skole.json @@ -1,7 +1,6 @@ { "description": "Rozm\u00fd\u0161\u013eate, \u010do vyu\u010dova\u0165 na semin\u00e1ri z programovania, ak chcete svojich\n\u0161tudentov nau\u010di\u0165 e\u0161te viac? Z\u00e1kladn\u00fa a maturitn\u00fa \u00farove\u0148 programovania,\nktor\u00fa sme predstavili v minulosti, sme roz\u0161\u00edrili o \u010fal\u0161ie zauj\u00edmav\u00e9\nt\u00e9my. V pr\u00edspevku v\u00e1m predstav\u00edme, \u010do u\u010d\u00edme na semin\u00e1roch z\nprogramovania. Uk\u00e1\u017eeme v\u00e1m, ktor\u00fdm t\u00e9mam sa venujeme, ako sme ich\nspracovali, ktor\u00fd kontext \u00faloh sa n\u00e1m osved\u010dil v praxi. Tie\u017e v\u00e1m\npredstav\u00edme kompletn\u00e9 materi\u00e1ly na stredn\u00fa \u0161kolu. Ako aktivizova\u0165\n\u0161tudentov? Ak\u00e9 programy vedia vytvori\u0165 a ako tvoria semin\u00e1rne pr\u00e1ce z\nprogramovania? Aj t\u00fdmto ot\u00e1zkam sa budeme venova\u0165 v na\u0161om pr\u00edspevku.\n", "duration": 1956, - "published_at": "2019-06-27T15:00:01.000Z", "recorded": "2019-03-22", "speakers": [ "Peter Ku\u010dera" diff --git a/pycon-sk-2019/videos/peter-valachovic-prechod-na-python-u-piaristov-v-nitre-a-niektore-tamojsie-studentske-projekty.json b/pycon-sk-2019/videos/peter-valachovic-prechod-na-python-u-piaristov-v-nitre-a-niektore-tamojsie-studentske-projekty.json index b66463726..206778711 100644 --- a/pycon-sk-2019/videos/peter-valachovic-prechod-na-python-u-piaristov-v-nitre-a-niektore-tamojsie-studentske-projekty.json +++ b/pycon-sk-2019/videos/peter-valachovic-prechod-na-python-u-piaristov-v-nitre-a-niektore-tamojsie-studentske-projekty.json @@ -1,7 +1,6 @@ { "description": "Kr\u00e1tko sa podel\u00edme o tom ako sa vyv\u00edja prechod na Python z pascalu\n(Delphi/Lazarus), \u010do sa n\u00e1m podarilo, a ak\u00e9 v\u00fdzvy pred nami sme\nidentifikovali konkr\u00e9tne by sme chceli hovori\u0165 o tom ako postupujeme pri\nv\u00fdvoji kurzu pripravuj\u00faceho na maturitu z informatiky kedy pou\u017eijeme\nprogramovac\u00ed jazyk python vo verzii 3.X. Predstav\u00edme projekt \u0161tudentskej\nu\u010debnice pre z\u00e1kladn\u00fd kurz Pythonu - od \u0161tudenta pre \u0161tudentov a\n\u0161tudentsk\u00fd projekt vylep\u0161enia zvonen\u00ed na \u0161kole.\n\nPri predstavovan\u00ed \u0161tudentskej u\u010debnice bude Tom\u00e1\u0161 Kiss hovori\u0165 o\nmotiv\u00e1cii pre t\u00fato u\u010debnicu a sp\u00f4sobe ak\u00fdm ju realizuje. Tie\u017e by chcel\npredostrie\u0165 v\u00fdzvy, ktor\u00e9 identifikuje a tie\u017e sp\u00f4sob ako bud\u00fa m\u00f4c\u0165\nostatn\u00ed u\u010debnicu z\u00edska\u0165.\n\nPri projekte o vylep\u0161en\u00ed zvonen\u00ed na \u0161kole op\u00ed\u0161e Jakub stav, ak\u00fd bol na\n\u0161kole predt\u00fdm a pop\u00ed\u0161e motiv\u00e1ciu k zlep\u0161eniu ako aj realizovan\u00fd syst\u00e9m.\nChcel by tie\u017e pouk\u00e1za\u0165 na mo\u017enosti a \u00faskalia, ktor\u00e9 sa pri v\u00fdvoji\nzvonen\u00ed vyskytli.\n", "duration": 1359, - "published_at": "2019-06-30T19:58:03.000Z", "recorded": "2019-03-22", "speakers": [ "Peter Valachovi\u010d" diff --git a/pycon-sk-2019/videos/petr-stehlik-the-dos-and-donts-of-task-queues.json b/pycon-sk-2019/videos/petr-stehlik-the-dos-and-donts-of-task-queues.json index aa781a57a..1ba84d2d3 100644 --- a/pycon-sk-2019/videos/petr-stehlik-the-dos-and-donts-of-task-queues.json +++ b/pycon-sk-2019/videos/petr-stehlik-the-dos-and-donts-of-task-queues.json @@ -2,7 +2,6 @@ "description": "Let's talk about our experience with Celery and intentions to switch to\nRQ which failed. We'll show our setup of larger and more complex apps\nthan the ones presented in doc examples together with some useful tips\nand tricks and how-tos on orchestrating such apps for thousands of tasks\na day.\n\nAt Kiwi.com we heavily rely on task queues and asynchronous execution of\ncode to process large amounts of requests coming to our back-ends. With\nthe separation of our codebase to microservices, we can quickly try new\ntools and different approaches to process these large volumes of\nrequests. The microservice we'll be talking about is making unreliable\nslow 3rd party services reliable and asynchronous with a bit of business\nlogic sprinkled on top of it. We\u2019ll tell a failure story of ours but\nresulting in a valuable lesson.\n\nMost of our services use Celery and it's the go-to tool for new services\nas well but we wanted to be different with this new microservice. RQ is\nthe next best choice for task queues and it is presented as simpler and\nmore straightforward than Celery. That can definitely be true but after\n3 weeks of research, development and struggling we found out the\nunpleasant truth about being simple and making the right choices. We\nwon't talk about comparing the frameworks but rather about the approach\non how to experiment with new things in your environment. After that,\nwe'll present our current setup which can take upon any number of tasks.\nHow we orchestrate the app and continuously integrate and deploy and\nwhat fun things await ahead of us in the development.\n", "duration": 2680, "language": "eng", - "published_at": "2019-08-06T12:19:31.000Z", "recorded": "2019-03-24", "speakers": [ "Petr Stehl\u00edk" diff --git a/pycon-sk-2019/videos/petra-dzurovcinova-inovacie-v-meste-bratislava.json b/pycon-sk-2019/videos/petra-dzurovcinova-inovacie-v-meste-bratislava.json index 5f4fc5f50..346277566 100644 --- a/pycon-sk-2019/videos/petra-dzurovcinova-inovacie-v-meste-bratislava.json +++ b/pycon-sk-2019/videos/petra-dzurovcinova-inovacie-v-meste-bratislava.json @@ -1,7 +1,6 @@ { "description": "\u010co v\u0161etko rob\u00edme v Bratislave pre inov\u00e1cie a ako m\u00f4\u017eete by\u0165 s\u00fa\u010das\u0165ou\ntohto procesu?\n", "duration": 1425, - "published_at": "2019-07-01T22:36:59.000Z", "recorded": "2019-03-22", "speakers": [ "Petra Dzurovcinova" diff --git a/pycon-sk-2019/videos/pycon-sk-2019-aftermovie.json b/pycon-sk-2019/videos/pycon-sk-2019-aftermovie.json index d58ebfd29..7ad2b9755 100644 --- a/pycon-sk-2019/videos/pycon-sk-2019-aftermovie.json +++ b/pycon-sk-2019/videos/pycon-sk-2019-aftermovie.json @@ -2,7 +2,6 @@ "description": "PyCon SK 2019 in 2 minutes...", "duration": 138, "language": "eng", - "published_at": "2019-06-27T13:22:59.000Z", "recorded": "2019-03-22", "speakers": [ "Various speakers" diff --git a/pycon-sk-2019/videos/pycon-sk-2019-edutalks.json b/pycon-sk-2019/videos/pycon-sk-2019-edutalks.json index 3a17baaa1..58241953f 100644 --- a/pycon-sk-2019/videos/pycon-sk-2019-edutalks.json +++ b/pycon-sk-2019/videos/pycon-sk-2019-edutalks.json @@ -1,7 +1,6 @@ { "description": "PyCon SK 2019 - Edutalks", "duration": 1546, - "published_at": "2019-06-30T21:07:09.000Z", "recorded": "2019-03-22", "speakers": [ "Various speakers" diff --git a/pycon-sk-2019/videos/radoslav-kokula-robot-framework-univerzalny-nastroj-pre-automatizovane-testovanie.json b/pycon-sk-2019/videos/radoslav-kokula-robot-framework-univerzalny-nastroj-pre-automatizovane-testovanie.json index 90d671709..1b1b968d5 100644 --- a/pycon-sk-2019/videos/radoslav-kokula-robot-framework-univerzalny-nastroj-pre-automatizovane-testovanie.json +++ b/pycon-sk-2019/videos/radoslav-kokula-robot-framework-univerzalny-nastroj-pre-automatizovane-testovanie.json @@ -1,7 +1,6 @@ { "description": "Robot Framework je open-source framework naprogramovan\u00fd v jazyku Python\nur\u010den\u00fd pre automatiz\u00e1ciu testov a podporu test-driven developmentu.\nKompatibilita s r\u00f4znymi opera\u010dn\u00fdmi syst\u00e9mami, jednoduch\u00e1 in\u0161tal\u00e1cia,\nzrozumite\u013en\u00e1 syntax vhodn\u00e1 aj pre \u013eud\u00ed bez program\u00e1torsk\u00fdch zru\u010dnosti, s\nmo\u017enos\u0165ou importova\u0165 vlastn\u00e9 Python kni\u017enice a moduly, a tie\u017e \u013eahkou\nintegr\u00e1ciou do CI/CD serverov (napr. Jenkins) s\u00fa jeho nesporn\u00e9 v\u00fdhody.\nV\u010faka mno\u017estvu dostupn\u00fdch kni\u017en\u00edc je mo\u017en\u00e9 vyu\u017ei\u0165 Robot Framework na\ntestovanie r\u00f4znych druhov aplik\u00e1cii a zariaden\u00ed s vyu\u017eit\u00edm \u0161irokej \u0161k\u00e1ly\nrozhran\u00ed, protokolov a d\u00e1tov\u00fdch typov. Poskytuje taktie\u017e mo\u017enosti\nparaleln\u00e9ho vykon\u00e1vania testov alebo sp\u00fa\u0161\u0165anie testov na vzdialen\u00fdch\nsyst\u00e9moch. Neoddelite\u013enou s\u00fa\u010das\u0165ou je aj automaticky generovan\u00fd report\nvo form\u00e1toch html a xml, poskytuj\u00faci preh\u013eadn\u00e9 inform\u00e1cie o trvan\u00ed,\npou\u017eit\u00fdch d\u00e1tach a v\u00fdsledku vykon\u00e1van\u00fdch testov a aj jednotliv\u00fdch\nkrokoch.\n\nPr\u00edspevok predstav\u00ed tento n\u00e1stroj na konkr\u00e9tnych pr\u00edkladoch testov vo\nform\u00e1toch pre keyword-driven, data-driven a behaviour-driven testing,\nako aj pr\u00e1cu so Seleniom a REST API rozhran\u00edm.\n", "duration": 1896, - "published_at": "2019-08-06T11:49:15.000Z", "recorded": "2019-03-23", "speakers": [ "Radoslav Koku\u013ea" diff --git a/pycon-sk-2019/videos/saumuel-hopko-amazon-web-services-alexa-skills.json b/pycon-sk-2019/videos/saumuel-hopko-amazon-web-services-alexa-skills.json index 7fd871db3..04d6d6f9d 100644 --- a/pycon-sk-2019/videos/saumuel-hopko-amazon-web-services-alexa-skills.json +++ b/pycon-sk-2019/videos/saumuel-hopko-amazon-web-services-alexa-skills.json @@ -1,7 +1,6 @@ { "description": "Alexa is Amazon\u2019s cloud base voice service that provides natural voice\nexperience to end users. This opens a way to use human voice as\ninput/output for you applications. You can easily create\nvoice-controlled applications (called Alexa skills) or just voice\ninterface for your current products or anything you like! Tools provided\nby Amazon web services (AWS) are easy to use and if used correctly, then\ncompletely free. Once skill is done, it\u2019s just a matter of minutes to\ndeploy it as high-available, auto-scaled, serverless, globally\ndistributed application using AWS. Then you can use Amazon Alexa Skill\nstore to sell your product all over the world. I would like to show you\nhow it works, what AWS provides to developers and how can anybody create\nAlexa skills using your favorite language \u2013 Python.\n", "duration": 2672, - "published_at": "2019-08-06T11:46:36.000Z", "recorded": "2019-03-23", "speakers": [ "Samuel Hopko" diff --git a/pycon-sk-2019/videos/stephane-wirtel-whats-new-in-python-37.json b/pycon-sk-2019/videos/stephane-wirtel-whats-new-in-python-37.json index 6e801ca69..b5b42abe1 100644 --- a/pycon-sk-2019/videos/stephane-wirtel-whats-new-in-python-37.json +++ b/pycon-sk-2019/videos/stephane-wirtel-whats-new-in-python-37.json @@ -2,7 +2,6 @@ "description": "Scheduled for release in mid-June before the conference, Python 3.7 is\nshaping up to be a feature-packed release! This talk will cover all the\nnew features of Python 3.7, including the Data Classes and the Context\nVariables for the asynchronous programming with asyncio.\n", "duration": 2122, "language": "eng", - "published_at": "2019-07-01T22:31:59.000Z", "recorded": "2019-03-23", "speakers": [ "St\u00e9phane Wirtel" diff --git a/pycon-sk-2019/videos/suryansh-tibarewala-mvp-is-never-just-a-mvp.json b/pycon-sk-2019/videos/suryansh-tibarewala-mvp-is-never-just-a-mvp.json index a079eec0d..b9d897695 100644 --- a/pycon-sk-2019/videos/suryansh-tibarewala-mvp-is-never-just-a-mvp.json +++ b/pycon-sk-2019/videos/suryansh-tibarewala-mvp-is-never-just-a-mvp.json @@ -2,7 +2,6 @@ "description": "MVP: Minimum Viable Product. We all follow agile product development\nprocess, iterate fast, fail fast and in the disguise of a MVP we cut\ncorners to release a product to the market ASAP. In this process, their\nare several compromises we make in the tech set up and the codebase to\njust get it out there. These compromises more often than not always\nbites us back.\n\nAnd this is what the talk is about. The assumptions which never hold,\nthe implementation which is never revisited, and the MVP which never\nends up being just a MVP. By the end of talk, you would know the\ncautionary tale of the most common mistakes everyone makes while\ndeveloping a new product and some measures on how you can avoid them.\n", "duration": 1416, "language": "eng", - "published_at": "2019-08-06T12:14:16.000Z", "recorded": "2019-03-24", "speakers": [ "Suryansh Tibarewala" diff --git a/pycon-sk-2019/videos/svetlana-hrabinova-nenutte-uzivatele-premyslet.json b/pycon-sk-2019/videos/svetlana-hrabinova-nenutte-uzivatele-premyslet.json index de6d42e68..16e39f150 100644 --- a/pycon-sk-2019/videos/svetlana-hrabinova-nenutte-uzivatele-premyslet.json +++ b/pycon-sk-2019/videos/svetlana-hrabinova-nenutte-uzivatele-premyslet.json @@ -1,7 +1,6 @@ { "description": "My\u0161lenka Steva Kruga \"Nenu\u0165te u\u017eivatele p\u0159em\u00fd\u0161let\" bude hlavn\u00ed inspirac\u00ed\npro p\u0159\u00edsp\u011bvek, kter\u00fd se bude v\u011bnovat pojm\u016fm jako je UX (user\nexperience), UI (user interface) a pou\u017eitelnost. Smyslem je pouk\u00e1zat na\nz\u00e1kladn\u00ed principy pou\u017eitelnosti a n\u00e1vrhu u\u017eivatelsk\u00e9ho pro\u017eitku, kter\u00e9\npomohou navrhovat a vytv\u00e1\u0159et intuitivn\u00ed a pochopiteln\u00e9 slu\u017eby \u010di\nu\u017eivatelsk\u00e1 rozhran\u00ed s c\u00edlem j\u00edt sv\u00fdm u\u017eivatel\u016fm naproti, tedy aby\nu\u017eivatel\u00e9 museli d\u011blat pouze nezbytn\u00e9 kroky s vynalo\u017een\u00edm co nejmen\u0161\u00edho\n\u00fasil\u00ed.\n", "duration": 1758, - "published_at": "2019-08-06T11:44:28.000Z", "recorded": "2019-03-23", "speakers": [ "Sv\u011btlana Hrabinov\u00e1" diff --git a/pycon-sk-2019/videos/sviatoslav-sydorenko-github-bots-rise-of-the-machines.json b/pycon-sk-2019/videos/sviatoslav-sydorenko-github-bots-rise-of-the-machines.json index a9c369a3d..84a5949ef 100644 --- a/pycon-sk-2019/videos/sviatoslav-sydorenko-github-bots-rise-of-the-machines.json +++ b/pycon-sk-2019/videos/sviatoslav-sydorenko-github-bots-rise-of-the-machines.json @@ -2,7 +2,6 @@ "description": "Hello, fellow human!\n\nYou probably spend too much time regretting that manual recurring\nroutines don't do themselves. Let's change this! This workshop will\nempower you to automate all the things using GitHub Apps integration\ntype as reusable restless workers helping you overcome typical\nmaintainer frustrations. We'll have a walkthrough of the complete\napplication creation and deployment process: from creating an App entity\nin the GitHub UI, to actual coding and to shipping your code via Heroku.\n\nTo participate in the hands-on, bring your laptop with Python 3.7 and\nsome editor on-board. You'll also need to have a GitHub account.\n\nBonus: `sign up for GitHub\nActions `__ as early as\npossible to try it out on the workshop.\n\nAgenda:\n\n\n", "duration": 2045, "language": "eng", - "published_at": "2019-06-14T14:45:24.000Z", "recorded": "2019-03-22", "speakers": [ "Sviatoslav Sydorenko" diff --git a/pycon-sk-2019/videos/tibor-frank-automated-visualization-and-presentation-of-tests-results.json b/pycon-sk-2019/videos/tibor-frank-automated-visualization-and-presentation-of-tests-results.json index 141a9d571..46b1cbab8 100644 --- a/pycon-sk-2019/videos/tibor-frank-automated-visualization-and-presentation-of-tests-results.json +++ b/pycon-sk-2019/videos/tibor-frank-automated-visualization-and-presentation-of-tests-results.json @@ -2,7 +2,6 @@ "description": "Thousands of performance tests run by tens of Jenkins jobs generate\ngigabytes of data. It is not an easy task to provide it to our audience.\nBut we do our best. Data visualization is not only coding in Python, it\nis above all finding the best way how to transform well structured but\nboring xml files to fancy and informative graphs and tables. They must\nbe beautiful, gratifying, logical and functional like any good design\nand/or architecture.\n", "duration": 1761, "language": "eng", - "published_at": "2019-08-06T12:15:18.000Z", "recorded": "2019-03-24", "speakers": [ "Tibor Frank" diff --git a/pycon-sk-2019/videos/tomas-dudik-ako-sa-dalej-pasujeme-s-python-na-hodinach-informatiky.json b/pycon-sk-2019/videos/tomas-dudik-ako-sa-dalej-pasujeme-s-python-na-hodinach-informatiky.json index 39b6f2cbd..9712b22c9 100644 --- a/pycon-sk-2019/videos/tomas-dudik-ako-sa-dalej-pasujeme-s-python-na-hodinach-informatiky.json +++ b/pycon-sk-2019/videos/tomas-dudik-ako-sa-dalej-pasujeme-s-python-na-hodinach-informatiky.json @@ -1,7 +1,6 @@ { "description": "Ako sa \u010falej pasujeme s Python na hodin\u00e1ch informatiky", "duration": 668, - "published_at": "2019-06-30T20:55:18.000Z", "recorded": "2019-03-22", "speakers": [ "Tom\u00e1\u0161 Dud\u00edk" diff --git a/pydata-delhi-2019/videos/auria-kathi-the-power-of-multi-model-machine-learning-pipelines-by-sleeba-paul.json b/pydata-delhi-2019/videos/auria-kathi-the-power-of-multi-model-machine-learning-pipelines-by-sleeba-paul.json index 468d4c9cc..89d44f7c2 100644 --- a/pydata-delhi-2019/videos/auria-kathi-the-power-of-multi-model-machine-learning-pipelines-by-sleeba-paul.json +++ b/pydata-delhi-2019/videos/auria-kathi-the-power-of-multi-model-machine-learning-pipelines-by-sleeba-paul.json @@ -2,7 +2,6 @@ "description": "Considering the current state of art deep learning algorithms, we might\nnot be able to come up with a single algorithm or network which can\nbuild an advanced creative application. But the components of the\napplication can be emulated using individual state of art algorithms.\nThis is called a Multi-model Pipeline architecture for Auria.\n\nBasic outline of the talk\n~~~~~~~~~~~~~~~~~~~~~~~~~\n\n1. Basic Machine Learning Pipelines [3-5mins]\n2. Using multiple models in a Machine Learning Pipeline [5-7mins]\n\n - Why multiple models?\n - How it is different from the single model pipelines?\n - What are the challenges?\n\n3. Case study - Auria Kathi the first poet artist [10-15mins]\n\n - Auria Kathi introduction\n - Engineering Pipeline of Auria Kathi\n - Azure Machine Learning Pipelines\n\n4. Conclusion and Q&A session [2mins]\n\nAuria Kathi - AI Poet Artist\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nAuria Kathi is an artificial artist and poet completely living online.\n\"Auria Kathi\" is an anagram for \"AI Haiku Art\". Auria generates a short\npoem, draws an abstract art based on the poem, and then colors the\npicture depending upon a mood. All these creative tasks are achieved\nusing a multi-model ML pipeline.\n\nWork of Auria is available in both Instagram and Twitter and will be\nposting daily for next year.\n\n- Instagram Handle: https://www.instagram.com/auriakathi/\n- Twitter Handle: https://twitter.com/AuriaKathi\n\nThe engineering pipeline of Auria\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n1. An LSTM based language model, trained on 3.5 lakhs Haikus scraped\n from Reddit. The model is used to generate artificial poetry.\n2. A text to image network, called AttnGAN from Microsoft Research,\n which converts the generated Haiku to an abstract image.\n3. A photorealistic style transfer algorithm which selects a random\n style image from WikiArt dataset, and transfer color and brush\n strokes to the generated image. The WikiArt dataset is a collection\n of 4k+ curated artworks, which are aggregated on the basis of\n emotions induced on human beings when the artwork is shown to them.\n\n.. figure:: https://sleebapaul.github.io/assets/auria_aml/auria_pipeline.png\n :alt: Engineering Pipeline of Auria\n\n Engineering Pipeline of Auria\n\nAuria on news and publications\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n1. Creative Applications Network -\n https://www.creativeapplications.net/member-submissions/auria-kathi-an-ai-artist-living-in-the-cloud/\n2. Coding Blues -\n https://codingblues.com/2019/01/11/fabin-sleeba-and-wonderful-auria/\n3. Creative AI Newsletter -\n https://us15.campaign-archive.com/?u=c7e080421931e2a646364e3ef&id=d1a15e8502\n4. Towards Datascience -\n https://towardsdatascience.com/auriakathi-596dfb8710d6\n5. Towards Datascience -\n https://towardsdatascience.com/auria-kathi-powered-by-microsoft-azure-machine-learning-pipelines-385de55de062\n\nFlorence Biennale 2019\n~~~~~~~~~~~~~~~~~~~~~~\n\nAt the 12th edition of Florence Biennale happens in October 2019, Auria\nis exhibiting her work under the contemporary digital art section. Being\nan international platform for Art, the presence of Auria's work produced\nby AI will be discussed in Florence Biennale with greater importance.\nFurthermore, how creative machines are going to build our future by\ninspiring artists to come up with novel ideas is also a crucial part of\nthe discussion.\n\nCollaboration with Microsoft\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nAuria is a perfect use case of Microsoft envisioned Azure Machine\nLearning Pipelines, where each step can be conceived as a containerized\ncomputation step. Multiple models developed in diverse environments can\nbe incorporated in the reproducible pipelines and it can be easily\ndeployed as an API. Collaborating with Microsoft, Auria's creative\npursuit is coming to a wider audience.\n", "duration": 1288, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-03", "speakers": [ "Sleeba Paul" diff --git a/pydata-delhi-2019/videos/automated-large-scale-forecasting-for-1000-products-by-aanish-singla-nikita-sharma.json b/pydata-delhi-2019/videos/automated-large-scale-forecasting-for-1000-products-by-aanish-singla-nikita-sharma.json index 7a600b76d..b0c4b12d7 100644 --- a/pydata-delhi-2019/videos/automated-large-scale-forecasting-for-1000-products-by-aanish-singla-nikita-sharma.json +++ b/pydata-delhi-2019/videos/automated-large-scale-forecasting-for-1000-products-by-aanish-singla-nikita-sharma.json @@ -2,7 +2,6 @@ "description": "Forecasting is very critical to businesses. It helps them plan for the\nfuture and gives them an opportunity to be prepared for upcoming demand.\nIt is often required in practice to estimate demand for thousands of\nproducts, e.g SKUs in retail stores. It s not humanly possible to create\nthese many models. In this session, we show how we did this for a large\nretailer.\n\nOutline of Talk\n~~~~~~~~~~~~~~~\n\n- Importance of Forecasting\n- Overview of Time series Forecasting Techniques\n\n - ARIMA\n - Exponential Smoothing\n - XGBoost\n\n- Generating Prediction Intervals for the Forecast\n- Our approach for forecasting large number of products\n\n - Problem Statement\n - Solution Methodology\n - Code Walk through\n\n- Questions and Answers\n", "duration": 1237, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-04", "speakers": [ "Aanish Singla", diff --git a/pydata-delhi-2019/videos/automating-data-pipeline-using-apache-airflow-by-mridu-bhatnagar.json b/pydata-delhi-2019/videos/automating-data-pipeline-using-apache-airflow-by-mridu-bhatnagar.json index de2cae479..cf541f2ab 100644 --- a/pydata-delhi-2019/videos/automating-data-pipeline-using-apache-airflow-by-mridu-bhatnagar.json +++ b/pydata-delhi-2019/videos/automating-data-pipeline-using-apache-airflow-by-mridu-bhatnagar.json @@ -2,7 +2,6 @@ "description": "Manually running scripts to extract, transform and load data is a\ntrade-off with time, is tedious and cumbersome. The process of building\na data pipeline can be automated. Scripts to extract data can be\nscheduled using crontab. However, using crontab has its own drawbacks.\nOne major challenge is monitoring. Airflow is a platform to\nprogrammatically author, schedule and monitor workflows.\n\nToday, we are moving towards machine learning. Making predictions,\nfinding out insights based on data. For the same purpose, the initial\nstep is to have efficient processes in place which help us in collecting\ndata from various different data sources. Using traditional ways to\ncollect data is tedious and cumbersome. Manually running scripts to\nextract, transform and load data is a trade-off with time.\n\nTo make the process efficient. The data pipeline can be automated.\nScripts to extract data can be auto-scheduled using crontab. However,\nusing crontab has its own drawbacks. One major challenge comes in\nmonitoring. This is where an open source tool built by AirBnB\nengineering team - Apache airflow helps. Airflow is a platform to\nprogrammatically author, schedule and monitor workflows.\n\nThe talk aims at introducing the attendees to.\n\n1. Airflow - overview of the tool. Advantages, disadvantages\n2. Directed acyclic graph - Examples of directed acyclic graph and\n directed cyclic graphs\n3. Operators a. Bash Operator b. Python Operator c. Email Operator\n4. Python context manager\n5. Examples\n6. Demo\n", "duration": 1544, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-03", "speakers": [ "Mridu Bhatnagar" diff --git a/pydata-delhi-2019/videos/closing-notes-pydata-delhi-2019.json b/pydata-delhi-2019/videos/closing-notes-pydata-delhi-2019.json index 856ab2889..90c062fbb 100644 --- a/pydata-delhi-2019/videos/closing-notes-pydata-delhi-2019.json +++ b/pydata-delhi-2019/videos/closing-notes-pydata-delhi-2019.json @@ -2,7 +2,6 @@ "description": "#PyDataDelhi19 is a wrap. Huge thanks to all the attendees, speakers, sponsors, reviewers and awesome volunteers. See you in 2020.\n\nAfter Video - https://youtu.be/P1PeTLfnEYo\nAttendees Experience - https://youtu.be/yldx0gFnHLY\n\nhttps://delhi.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases. #PyDataDelhi2019 #PyData #DataScience", "duration": 495, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-04", "speakers": [ "Various speakers" diff --git a/pydata-delhi-2019/videos/deep-sequence-models-for-attribute-extraction-from-product-titles-by-deepak-sharma.json b/pydata-delhi-2019/videos/deep-sequence-models-for-attribute-extraction-from-product-titles-by-deepak-sharma.json index b5ac57a85..8851b5aed 100644 --- a/pydata-delhi-2019/videos/deep-sequence-models-for-attribute-extraction-from-product-titles-by-deepak-sharma.json +++ b/pydata-delhi-2019/videos/deep-sequence-models-for-attribute-extraction-from-product-titles-by-deepak-sharma.json @@ -2,7 +2,6 @@ "description": "We are working on extracting attributes (brand, shape, color etc) from\nraw product descriptions. The text is short and noisy and highly\ncontextual and the labeling of attributes for training ML models is\ncostly. I discuss how we build a deep sequence CNN-BiLSTM-CRF model in\nPytorch to extract attributes from noisy text with minimum labeling\nusing an active learning approach.\n\nAt Clustr, I am working on converting raw product data available with\nSMEs to structured catalog. One of the key tasks of building catalogue\nis extracting attributes from raw product titles. Typical attributes\ninclude Brands, Color, Shape, measurement etc. Product titles are\nusually very short text describing the product without any significant\ngrammar. The titles are very dependent on the user which generates very\nnoisy text with abbreviations, spelling mistakes, omiitted text,\nimproper spaces, transliteration etc. The additional challenge is\navailability of labeled data to train a machine learning model on this.\nThe product data we receive is not labeled and labeling is a costly\nexcercise. I show how we built a deep sequence model with CNN, BiLSTM\nand CRF architecture and tuned it using active learning methods for this\ntask.\n\nI will discuss various deep sequence models combined with conditional\nrandom fields to label attributes from such text and outline pros and\ncons of different architectures. The model uses pretrained word\nembeddings. I will outtine some of the challenges of sparse tokens and\nnoise while building our domain specific word embeddings. A key aspect\nof the problem is the lack of labeled data and high cost of getting this\ndata. To minimize the cost of labeling I trained our model using an\nactive learning approach. The active labeling requires sampling\nstrategies such that minimum labeling can have maximum improvement in\nmodel performance. I implemented both model confidence based sampling\nand data coverage based sampling such that we are able to label examples\nwhich the model is least confident about and which are very different\nfrom the existing training examples. The active learning examples in a\nsingle training iteration were only about 1000 examples. Training models\nwith such few examples required us to be very careful about overfitting\nin training. I will also talk about how I regularized the model.\n\nTo rapidly iterate in experiments I created an experimental setup which\nallowed rapid changes and traceability. It was challenging to measure\nthe performance of the model and understand the limitations of the\nmodel. To do this more effectively, I tracked various metrics to measure\nthe performance of the model including various metrics relevant to the\nNamed Entity Tasks. These metrics were very informative in identifying\nthe gaps in the model. I will discuss these in detail.\n\nOverall this talk will provide audience a good in depth understanding of\nhow deep sequence models are built in Pytorch for challenging\ninformation extraction tasks. They will understand the pros and cons of\ndifferent architectures, things to keep in mind while tuning such deep\nmodels and how active learning is performed in deep sequence models.\n", "duration": 1565, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-04", "speakers": [ "Deepak Sharma" diff --git a/pydata-delhi-2019/videos/deepening-democracy-through-data-learnings-from-indian-politics-and-policy-by-roshan-shankar.json b/pydata-delhi-2019/videos/deepening-democracy-through-data-learnings-from-indian-politics-and-policy-by-roshan-shankar.json index 3b6e7ddb5..cbb207525 100644 --- a/pydata-delhi-2019/videos/deepening-democracy-through-data-learnings-from-indian-politics-and-policy-by-roshan-shankar.json +++ b/pydata-delhi-2019/videos/deepening-democracy-through-data-learnings-from-indian-politics-and-policy-by-roshan-shankar.json @@ -2,7 +2,6 @@ "description": "Deepening Democracy through Data: Learnings from Indian Politics and Policy by Roshan Shankar @ PyData Delhi 2019\n\nhttps://delhi.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases. #PyDataDelhi2019 #PyData #DataScience", "duration": 1126, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-03", "speakers": [ "Roshan Shankar" diff --git a/pydata-delhi-2019/videos/differentiable-programming-an-exciting-generalisation-of-deep-neural-networks-by-dr-viral-b-shah.json b/pydata-delhi-2019/videos/differentiable-programming-an-exciting-generalisation-of-deep-neural-networks-by-dr-viral-b-shah.json index 0fb9305e0..def4f791f 100644 --- a/pydata-delhi-2019/videos/differentiable-programming-an-exciting-generalisation-of-deep-neural-networks-by-dr-viral-b-shah.json +++ b/pydata-delhi-2019/videos/differentiable-programming-an-exciting-generalisation-of-deep-neural-networks-by-dr-viral-b-shah.json @@ -2,7 +2,6 @@ "description": "PyData Delhi 2019 Keynote on Differentiable Programming - An exciting generalization of deep neural networks by Dr Viral B. Shah.\n\nhttps://delhi.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases. #PyDataDelhi2019 #PyData #DataScience", "duration": 3309, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-04", "speakers": [ "Dr Viral B. Shah" diff --git a/pydata-delhi-2019/videos/enhancing-credit-decision-making-for-new-applications-using-ml-and-python-by-rachita-das.json b/pydata-delhi-2019/videos/enhancing-credit-decision-making-for-new-applications-using-ml-and-python-by-rachita-das.json index 37e6da74c..1e74355b2 100644 --- a/pydata-delhi-2019/videos/enhancing-credit-decision-making-for-new-applications-using-ml-and-python-by-rachita-das.json +++ b/pydata-delhi-2019/videos/enhancing-credit-decision-making-for-new-applications-using-ml-and-python-by-rachita-das.json @@ -2,7 +2,6 @@ "description": "This topic discusses the details of an application scorecard, a key risk decisioning tool used across all financial institutions. The talk will cover the aspects of model development on Python leveraging machine learning algorithms. Additionally, a bokeh based interactive visualization tool will be discussed, that will enable the risk officers to take the most profitable decisions. \n\nhttps://delhi.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases. #PyDataDelhi2019 #PyData #DataScience", "duration": 1864, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-03", "speakers": [ "Rachita Das" diff --git a/pydata-delhi-2019/videos/fundamental-results-in-ml-and-how-to-use-them-by-jaidev-deshpande.json b/pydata-delhi-2019/videos/fundamental-results-in-ml-and-how-to-use-them-by-jaidev-deshpande.json index 8ecbc473b..68c4689f4 100644 --- a/pydata-delhi-2019/videos/fundamental-results-in-ml-and-how-to-use-them-by-jaidev-deshpande.json +++ b/pydata-delhi-2019/videos/fundamental-results-in-ml-and-how-to-use-them-by-jaidev-deshpande.json @@ -2,7 +2,6 @@ "description": "We\u2019ve all heard terms like Bayes error, perceptron learning theorem, the\nfundamental theorem of statistical learning, VC dimension, etc. This\ntalk is about using the math-heavy fundamentals of machine learning to\nunderstand the very solvability of classification problems. By the end\nof the talk, you will get a clear picture of how these ideas can be\npractically applied to classification problems.\n\nWhy does a classifier not fit? This can only happen for two reasons:\n\n- Because the model is not smart enough, or\n- Because the training data itself is not \u201cclassifiable\u201d.\n\nUnfortunately, the only obvious way to determine the *classifiability*\nor *separability* of a training dataset is to use a variety of\nclassification models with a variety of hyperparameters. In other words,\nseparability of classes in a dataset is usually expressed only in terms\nof which model worked on that dataset.\n\nUnfortunately, this does not answer the fundamental question of whether\na dataset is classifiable or not. If we keep on increasing the\ncomplexity of models and trying them out on a dataset without success,\nall we can infer from this is that the set of models we have tried out\n*so far* are incapable of learning the classification problem. It does\nnot necessarily mean that the problem is unsolvable.\n\nFortunately, many shallow learning models have been widely studied and\nare well understood. As such, it is quite possible to place theoretical\nbounds on their performance in the context of a dataset. There are a\nvariety of statistics that we can use *a priori* to determine the\nlikelihood of a model fitting a dataset.\n\nThis talk is about how we can use these results towards developing a\nstrategy, a structured approach for carrying out machine learning\nexperiments, instead of blindly running models and hoping that one of\nthem works. Starting from elementary results like Bayes theorem and the\nperceptron learning rule all the way up to complex ideas like kernel\nmethods and VC dimension, this talk develops a framework for the\nanalysis of data in the context of separability of classes.\n\nWhile the talk might sound theoretical, major focus will be on how to\nmake practical, hands-on use of these concepts to better understand your\ndata and your models. By the end of the talk, you will have learnt how\nto *prioritize* which models to use on which dataset, and how to compute\nthe likelihood of them fitting on the data. This rigorous analysis of\nmodels and data saves a lot of effort and money, as the talk will\ndemonstrate with real-world examples.\n", "duration": 1755, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-03", "speakers": [ "Jaidev Deshpande" diff --git a/pydata-delhi-2019/videos/generating-tabla-note-sequences-with-markov-chains-and-processingp5js-by-keshav-joshi.json b/pydata-delhi-2019/videos/generating-tabla-note-sequences-with-markov-chains-and-processingp5js-by-keshav-joshi.json index dc2fc5aec..00fad4e8a 100644 --- a/pydata-delhi-2019/videos/generating-tabla-note-sequences-with-markov-chains-and-processingp5js-by-keshav-joshi.json +++ b/pydata-delhi-2019/videos/generating-tabla-note-sequences-with-markov-chains-and-processingp5js-by-keshav-joshi.json @@ -2,7 +2,6 @@ "description": "A fun little web-app that generates random tabla-note sequences, using a\nJSON file, Markov chains and processing (p5.js). All you need to\ncustomise this is your own note-sound files and note-to-note transition\nprobability distributions.\n\nIntro\n~~~~~\n\n- Concept: An intuitive, interactive exploration of tabla notes and\n patterns\n- Who I am: my background\n\nDemo\n~~~~\n\n- Listening to saved patterns, tabla samples\n- Random tabla-note generation\n- Design choices\n- how the parts connect with p5\n\nWhat/Why Markov chains\n~~~~~~~~~~~~~~~~~~~~~~\n\n- Tabla states\n- Tabla note-sequence JSON\n\nWhat is p5.js\n~~~~~~~~~~~~~\n\n- code-architecture\n\nRelated Work\n~~~~~~~~~~~~\n\n- tabla apps\n- tabla research\n\nTODO: Other sequence generators\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n- Changing how we sample from the Markov model\n- RNNs\n\nTODO: Design and new features\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n- more sliders!\n", "duration": 757, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-03", "speakers": [ "Keshav Joshi" diff --git a/pydata-delhi-2019/videos/how-gpu-computing-literally-saved-me-at-work-by-abhishek-mungoli.json b/pydata-delhi-2019/videos/how-gpu-computing-literally-saved-me-at-work-by-abhishek-mungoli.json index afbc9f67a..54022420c 100644 --- a/pydata-delhi-2019/videos/how-gpu-computing-literally-saved-me-at-work-by-abhishek-mungoli.json +++ b/pydata-delhi-2019/videos/how-gpu-computing-literally-saved-me-at-work-by-abhishek-mungoli.json @@ -2,7 +2,6 @@ "description": "Distributed/Parallel computing is at the heart of new technology. Every\ncompany, big or small want to make most of the technology available to\nthem. One such niche technology is GPU computing. Here, I present to you\na real- world application on how GPU can save computing efforts and\nreduce the computation time from 2 days to 20 seconds. Shared is a live\napplication from Retail domain utilizing GPU.\n\nDistributed/Parallel computing is at the heart of new technology. Every\ncompany, big or small want to make most of the technology available to\nthem. One such niche technology is GPU computing. If used cautiously can\nsave a lot of computing efforts and time across the applications.\nBusiness, with the boom in Machine learning/Deep learning techniques,\nare on the way to leverage this technology in their day to day work.\nHere, I present to you a real-time application on how GPU can save\ncomputing efforts and reduce the computation time from 2 days to 20\nseconds. The talk will cover the best case scenarios and use case for\nthe GPU implementing for recommendations at scale. The talk will start\nwith the overview of the problem at hand, comparing CPU and GPU\nprocessing time and best fit to utilize GPU for the task in hand or any\nother scenario.\n\nFor those, interested in delving into the detailed code utilized for the\nsame, here\u2019s the link to my blog containing the same,\nhttps://medium.com/walmartlabs/how-gpu-computing-literally-saved-me-at-work-\nfc1dc70f48b6\n", "duration": 1544, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-03", "speakers": [ "Abhishek Mungoli" diff --git a/pydata-delhi-2019/videos/how-i-used-data-visualization-in-my-quest-for-the-perfect-wine-by-pranav-suri.json b/pydata-delhi-2019/videos/how-i-used-data-visualization-in-my-quest-for-the-perfect-wine-by-pranav-suri.json index f7538be87..7cc699bf3 100644 --- a/pydata-delhi-2019/videos/how-i-used-data-visualization-in-my-quest-for-the-perfect-wine-by-pranav-suri.json +++ b/pydata-delhi-2019/videos/how-i-used-data-visualization-in-my-quest-for-the-perfect-wine-by-pranav-suri.json @@ -2,7 +2,6 @@ "description": "For many, wine is a taste acquired over many years; others use data. In\nthis talk, I recite the story of how I uncovered the constituents of a\ngood wine using Data Visualization while discussing the nuances of\nExploratory Data Analysis (EDA) \u2013 the process of taking the first glance\nat data.\n\nIdea Behind The Talk\n^^^^^^^^^^^^^^^^^^^^\n\nWith the rise of tools allowing for smooth implementation of powerful\nalgorithms, it is tempting to skip EDA. However, **EDA is just as\nimportant as any part of a data project; if you don't know your data\nwell enough, you can end up doing very shallow work** , i.e., inaccurate\nmodels, choosing wrong variables, inefficient use of resources, or all\nof the above. Sometimes, EDA uncovers more than what the confirmatory\nstudy would've done otherwise.\n\nExploratory Data Analysis is what one should do when first encountering\na dataset. However, it's not a one-off process: there are setbacks,\nmultiple iterations, and the process sets the tone for a more formal\nanalysis of data in hand. **With a story-like format, the presentation\nmentions the setbacks one faces when performing a real data study.**\n\n**The motivation behind creating this talk is to impart the idea of\nExploratory Data Analysis, and how Data Visualizations help uncover\npatterns (not limited to the findings of the wine study). Moreover, I\nbelieve the format of a sharing a real story with the idea of \"how to\nreach and infer from a specific plot\" would help the audience understand\ndata visualization better than talking about syntactic sugar of a\nparticular visualization library. Moreover, the ideas can be further\ngeneralized to any other visualization library.**\n\nOutline of the Talk\n^^^^^^^^^^^^^^^^^^^\n\n- History of Wine & Data Science\n- Introduction to Exploratory Data Analysis (EDA)\n- Why Data Visualization? \u2013 Anscombe's Quartet\n- The Grammar of Graphics: Why I used ggplot2?\n\nWine Project: Finding Constituents of Good Red & White Wines\n''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''\n\n- About the Project & How to Replicate It?\n- How to Quantify \u2018Artistic\u2019 Measures?\n- Principles of Data Visualization to Uncover Patterns\n- Inspecting Data Using Univariate, Bivariate & Multivariate Plots\n\nAspects of EDA Not Used In the Wine Project\n'''''''''''''''''''''''''''''''''''''''''''\n\n- How to Prepare Your Dataset? \u2013 Data Aggregation\n- How to Remove Outliers Using Data Visualization\n- How to Decide the Best Fit During EDA\n- When to Transform Variables\n", "duration": 1171, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-04", "speakers": [ "Pranav Suri" diff --git a/pydata-delhi-2019/videos/how-not-to-ask-questions-by-raman-tehlan.json b/pydata-delhi-2019/videos/how-not-to-ask-questions-by-raman-tehlan.json index 3f1a50014..bc075f5b4 100644 --- a/pydata-delhi-2019/videos/how-not-to-ask-questions-by-raman-tehlan.json +++ b/pydata-delhi-2019/videos/how-not-to-ask-questions-by-raman-tehlan.json @@ -2,7 +2,6 @@ "description": "https://delhi.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases. #PyDataDelhi2019 #PyData #DataScience", "duration": 325, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-03", "speakers": [ "Raman Tehlan" diff --git a/pydata-delhi-2019/videos/image-manipulation-detection-using-neural-networks-by-sonal-kukreja.json b/pydata-delhi-2019/videos/image-manipulation-detection-using-neural-networks-by-sonal-kukreja.json index 45db4573d..ac557f9cb 100644 --- a/pydata-delhi-2019/videos/image-manipulation-detection-using-neural-networks-by-sonal-kukreja.json +++ b/pydata-delhi-2019/videos/image-manipulation-detection-using-neural-networks-by-sonal-kukreja.json @@ -2,7 +2,6 @@ "description": "In the present world of social networking, image manipulation is the\neasiest and the scariest job! This requires an utter need of efficient\nimage manipulation detection techniques. Moving from the traditional\nimage manipulation detection techniques to the present scenario, CNN can\nbe the perfect deep learning model as the human visual cortex has the\nability to detect tampered areas in an image!\n\nWith the advent of social networking services like Facebook and\nInstagram in the past few decades, the sharing of digital images has\nsubstantially increased. Also, these images can be easily modified these\ndays by using easily available image processing softwares like Adobe\nPhotoshop. These modified images are used for fake news, mob incitement,\netc. Hence there is a crucial need for image authentication schemes that\ncan verify if the image is authentic or manipulated. Earlier, most of\nthe research in this direction has been done in pixel based, format\nbased, camera based, physics based and geometry based schemes. All these\nschemes work upon the visual information of the image. Recently, CNN\u2019s\ncame into picture that are inspired by visual cortex. It was analyzed\nthat it is possible for a human visual cortex to detect tampered areas\nin an image. Thus CNN can be the perfect deep learning model for this\njob! Content to be discussed: Traditional Image Manipulation Detection\nTechniques Various datasets available for Image Manipulation Detection\nExperimentation Learning Rich Features for Image Manipulation Detection\nCNN based Image Manipulation Detection Techniques\n", "duration": 960, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-04", "speakers": [ "Sonal Kukreja" diff --git a/pydata-delhi-2019/videos/improving-model-lifecycle-management-for-machine-learning-models-by-pankaj-gupta.json b/pydata-delhi-2019/videos/improving-model-lifecycle-management-for-machine-learning-models-by-pankaj-gupta.json index 88257f5c2..d6dc9ca8b 100644 --- a/pydata-delhi-2019/videos/improving-model-lifecycle-management-for-machine-learning-models-by-pankaj-gupta.json +++ b/pydata-delhi-2019/videos/improving-model-lifecycle-management-for-machine-learning-models-by-pankaj-gupta.json @@ -2,7 +2,6 @@ "description": "This talk presents a systematic approach to improve model lifecycle\nmanagement for machine learning models, using Python/DevOps tools and\nprocesses to enable self-served model development and deployment; a\nfour-step workflow guides the developer/modeler to develop, automate,\ntest and implement the model in a virtual environment in a\nproduction-ready manner.\n\nStatistical/machine-learning modeling is often subject to regulatory\nmandates on model development, validation, deployment and monitoring.\nThese mandates are liable to require that production models are\nfit-for-purpose, are accurately implemented and obey with local laws and\nregulations. However in many environment complying with such mandates is\nextremely onerous and time consuming, e.g. due to the use of legacy\ntechnologies and workflows or unstructured model development by data\nscientists.\n\nThis talk presents a systematic approach to this challenge, using\nPython/DevOps tools and processes to enable self-served model\ndevelopment and deployment; a four-step workflow guides the modeler to\ndevelop, automate, test and implement the model in a virtual environment\nin a production-ready manner. The approach is in use by the Quantitative\nAnalytics team at Barclays bank, but we believe that the issues and\npotential solutions are relevant to the other regulated industries.\n", "duration": 1600, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-04", "speakers": [ "Pankaj Gupta" diff --git a/pydata-delhi-2019/videos/intelligent-recruitment-using-nlp-and-ml-to-identify-the-most-suitable-candidates-by-victor-robin.json b/pydata-delhi-2019/videos/intelligent-recruitment-using-nlp-and-ml-to-identify-the-most-suitable-candidates-by-victor-robin.json index 894ceb6cb..6fa190ba2 100644 --- a/pydata-delhi-2019/videos/intelligent-recruitment-using-nlp-and-ml-to-identify-the-most-suitable-candidates-by-victor-robin.json +++ b/pydata-delhi-2019/videos/intelligent-recruitment-using-nlp-and-ml-to-identify-the-most-suitable-candidates-by-victor-robin.json @@ -2,7 +2,6 @@ "description": "It is very common to get 100s of applicants for a role, which is\nbecoming difficult for recruiters to shortlist the most relevant\ncandidates. We present here a solution developed at dunnhumby that\nleverages Natural Language Processing to assist our recruitment team. By\nsimply providing a reference resume, our tool helps identifying similar\ncandidates, helping us focus on the right candidates.\n\nThe talk presents a real-life example of application of machine learning\nand natural language processing for the industry.\n\nFor every role we advertise, we get 100s if not 1000s of applicants.\nShortlisting the most relevant candidates becomes therefore very\ndifficult, and no recruitment team has the capacity and time to go\nthrough all the CV of the candidates who applied for a role. It also\nincreases the chances to miss the ideal candidate. We present here a\nsolution developed internally that leverages natural language processing\nto shortlist the most relevant candidates.\n\nVery often, the hiring manager provides the recruitment team with a\nreference CV of the ideal profile he or she is looking for. The way we\nhave designed the tool is as follows: The text from all the CVs is\nextracted and converted into a vector format using Word2Vec. This\ngenerates a vector representation of the candidate\u2019s CV, which are then\ncompared using cosine-similarities with the vector of the reference CV.\nThis returns the CV that are the most similar to our reference profile.\n\nThe methodology has proven to give good results, particularly efficient\nat shortlisting the most promising candidates. It still does not replace\nan actual interview, but it has the potential of greatly reducing the\nhiring time and ensuring only the most promising candidates are\ninterviewed, saving time for both the manager and the candidates.\n", "duration": 1095, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-03", "speakers": [ "Victor Robin" diff --git a/pydata-delhi-2019/videos/james-powell-because-you-cant-run-you-cant-hide-some-musings-on-api-design-pydata-delhi-2019.json b/pydata-delhi-2019/videos/james-powell-because-you-cant-run-you-cant-hide-some-musings-on-api-design-pydata-delhi-2019.json index 0af3b995c..7cef1c4d8 100644 --- a/pydata-delhi-2019/videos/james-powell-because-you-cant-run-you-cant-hide-some-musings-on-api-design-pydata-delhi-2019.json +++ b/pydata-delhi-2019/videos/james-powell-because-you-cant-run-you-cant-hide-some-musings-on-api-design-pydata-delhi-2019.json @@ -2,7 +2,6 @@ "description": "Because You Can't Run, You Can't Hide: Some Musings on API Design by James Powell @ PyData Delhi 2019\n\nhttps://delhi.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases. #PyDataDelhi2019 #PyData #DataScience", "duration": 2735, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-03", "speakers": [ "James Powell" diff --git a/pydata-delhi-2019/videos/knowledge-graph-made-simple-using-nlp-and-transfer-learning-by-suyog-swami.json b/pydata-delhi-2019/videos/knowledge-graph-made-simple-using-nlp-and-transfer-learning-by-suyog-swami.json index b86e3f014..c3ba36372 100644 --- a/pydata-delhi-2019/videos/knowledge-graph-made-simple-using-nlp-and-transfer-learning-by-suyog-swami.json +++ b/pydata-delhi-2019/videos/knowledge-graph-made-simple-using-nlp-and-transfer-learning-by-suyog-swami.json @@ -2,7 +2,6 @@ "description": "In 30 years, dunnhumby has built a huge knowledge base. GRAKN.AI is a\npowerful Knowledge Graph database that provides automated reasoning to\nconnect information together and derive powerful insights. The challenge\nis that it has its own query language. We present here a solution based\non NLP and transfer learning that converts any questions into a GRAKN\nquery, making its content accessible to all.\n\n**dunnhumby** , the world\u2019s first data science platform has around 400+\nData Scientist, delivering 1000+ projects yearly using 50+ analytical\nsolutions. This huge knowledge base has an abundance of information\nwhich can be explored to extract useful insights to help employees and\nstakeholders better the processes and progress in right direction.\n\n**GRAKN** : GRAKN.AI is knowledge graph, a database to organise complex\nnetworks of data and make it queryable, by performing knowledge\nengineering. Grakn provides the knowledge foundation for cognitive and\nintelligent (e.g. AI) systems, by providing an intelligent language for\nmodelling, transactions and analytics.\n\n**Graql** : GRAKN\u2019s query language provides an enhanced\nentity-relationship schema to model complex datasets. It performs\nlogical inference through entity and relationship type deductive\nreasoning, as well as rule-based reasoning. This allows the discovery of\nfacts and patterns that would otherwise be too hard to find.\n\n**The issue** : As mentioned above GRAKN has its own query language\ncalled graql. Na\u00efve users cannot efficiently leverage the power of GRAKN\ndirectly due to lack of technical knowledge.\n\nAn example of query to find an expert at the algorithm XGBoost:\n\n::\n\n match \n $ expert isa employee, has name $name_expert;\n $algorithm isa algorithm, has name=\u2019XGBoost\u2019;\n ($expert, $algorithm) isa leverage;\n get $name_expert;\n\nBut what if they could express their question or request in their\nnatural language?\n\n**The opportunity** : We have developed a solution which convert the\nnatural language text into GRAKN queries and act as a precursor to the\nGRAKN input interface. Hence making knowledge graphs explorable to users\nwho want to focus on insights rather than technicality.\n\n**Our Solution** : Algorithm is explained below:\n\n1. Take input text string\n2. Identify nouns and verbs from the string by using Word2Vec and\n Transfer Learning\n3. Infer the nouns in the entity list available in Knowledge database\n4. Similarly, infer verbs in the relations list available in Knowledge\n database\n5. Also, match exact entity values with the nouns in the database.\n6. Using the collection of similar entity, relations generate a match\n query containing entity and relations.\n\nEg.\n\n*User*\n\n::\n\n \u201cTell me who is an expert at \u2018XGBoost\u2019 \u201d\n\n*Text2GQL*\n\n::\n\n match \n $ expert isa employee, has name $name_expert;\n $algorithm isa algorithm, has name=\u2019XGBoost\u2019;\n ($expert, $algorithm) isa leverage;\n get $name_expert;\n\n**Future enhancement** : Idea is to make the tool generic which takes\nschema, data source (json) and input string and generates a grakn query\nwhich would be used an input to grakn interface. Also we can use similar\napproach to automate the process of parsing documents and identifying\nentities and relations which could be inserted in the graph database.\n", "duration": 1483, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-04", "speakers": [ "Suyog Swami" diff --git a/pydata-delhi-2019/videos/opening-notes-pydata-delhi-2019.json b/pydata-delhi-2019/videos/opening-notes-pydata-delhi-2019.json index 707d5b553..193911fcf 100644 --- a/pydata-delhi-2019/videos/opening-notes-pydata-delhi-2019.json +++ b/pydata-delhi-2019/videos/opening-notes-pydata-delhi-2019.json @@ -2,7 +2,6 @@ "description": "Conference deemed open by the Conference Chair Mr.Sanket Verma @PyData Delhi 2019\n\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases. #PyDataDelhi2019 #PyData #DataScience", "duration": 407, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-03", "speakers": [ "Various speakers" diff --git a/pydata-delhi-2019/videos/panel-discussion-pydata-delhi-2019.json b/pydata-delhi-2019/videos/panel-discussion-pydata-delhi-2019.json index 8b6036935..3042d2e56 100644 --- a/pydata-delhi-2019/videos/panel-discussion-pydata-delhi-2019.json +++ b/pydata-delhi-2019/videos/panel-discussion-pydata-delhi-2019.json @@ -2,7 +2,6 @@ "description": "The Panel Discussion at PyData Delhi19.The speakers provided a diverse perspective on topics such as women in tech platforms and a comparison between the oriental and the western education system. \n\nhttps://delhi.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases. #PyDataDelhi2019 #PyData #DataScience", "duration": 2940, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-04", "speakers": [ "Various speakers" diff --git a/pydata-delhi-2019/videos/predicting-real-time-transaction-fraud-using-python-and-spark-by-mayank-jain.json b/pydata-delhi-2019/videos/predicting-real-time-transaction-fraud-using-python-and-spark-by-mayank-jain.json index 7ee4f82f6..d602b9041 100644 --- a/pydata-delhi-2019/videos/predicting-real-time-transaction-fraud-using-python-and-spark-by-mayank-jain.json +++ b/pydata-delhi-2019/videos/predicting-real-time-transaction-fraud-using-python-and-spark-by-mayank-jain.json @@ -2,7 +2,6 @@ "description": "Predicting transaction fraud in real-time is an important challenge due\nto large data size, imbalanced target class, ever changing fraud MOs &\nstrict requirement for prediction inference speed which machine learning\nmodels can help to solve. Using open source technologies & distributed\ncomputing, Barclays has been developing solutions to reduce fraud losses\nand limit adverse customer experience.\n\nTransaction Fraud Model Development\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\nPredicting transaction fraud of debit and credit card payments in\nreal-time is an important challenge, which new technologies and\nstate-of-art supervised machine learning models can help to solve. While\ndifferent supervised learning techniques, like Logistic Regression and\nNeural Networks, have been used for many years, recent developments in\nDeep Learning, Gradient Boosted Machines, and Recurrent Neural Networks,\nhave opened up a wealth of options that can provide significant\nimprovements over the existing models.\n\nAdvantages of Distributed Computing\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\nWhile the transaction volumes are humongous (billions of transaction per\nyear), non-distributed packages like numpy or pandas easily run out of\nmemory. Distributed computing solves this problem. Spark serves as a\nsolution to Raw data processing, Data Quality and Reconciliation and\nmost importantly Feature engineering where thousands of features are\nbeing created and tested.\n\nReal Challenges with fraud data\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\nMachine Learning techniques are in general well-suited for transaction\nfraud, however, large data volumes (billions of transaction per year),\nvery imbalanced target class (rare events), ever changing fraud MOs, and\nstrict requirements for the prediction inference speed, mean that some\nmethods are better suited than others. With the help of open source\ntechnologies like python and distributed computing using spark, Barclays\nhas been developing and testing different solutions to reduce fraud\nlosses and limit adverse customer experience.\n\nThe main emphasis of the talk is to show how to train supervised\ntransaction fraud models that can be implemented and how these models\nimprove both customer experience and help to reduce fraud losses. The\npresentation will show results of a machine learning model that is\noperating in production.\n\nThe audience will learn - how real-time transaction fraud models work\nand the main challenges in transactions fraud modelling - how\ndistributed computing can come to an advantage - which supervised\nmachine learning techniques are most applicable\n", "duration": 1501, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-03", "speakers": [ "Mayank Jain" diff --git a/pydata-delhi-2019/videos/research-midas-iiit-d-by-hitkul.json b/pydata-delhi-2019/videos/research-midas-iiit-d-by-hitkul.json index 0bed8a51e..cffeb0b09 100644 --- a/pydata-delhi-2019/videos/research-midas-iiit-d-by-hitkul.json +++ b/pydata-delhi-2019/videos/research-midas-iiit-d-by-hitkul.json @@ -2,7 +2,6 @@ "description": "Research @ MIDAS, IIIT-D by Hitkul \n\nhttps://delhi.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases. #PyDataDelhi2019 #PyData #DataScience", "duration": 805, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-03", "speakers": [ "Hitkul" diff --git a/pydata-delhi-2019/videos/the-power-of-data-science-to-measure-unmeasured-parameters-in-emerging-markets-by-prukalpa-sankar.json b/pydata-delhi-2019/videos/the-power-of-data-science-to-measure-unmeasured-parameters-in-emerging-markets-by-prukalpa-sankar.json index 854faf8b3..9d957595d 100644 --- a/pydata-delhi-2019/videos/the-power-of-data-science-to-measure-unmeasured-parameters-in-emerging-markets-by-prukalpa-sankar.json +++ b/pydata-delhi-2019/videos/the-power-of-data-science-to-measure-unmeasured-parameters-in-emerging-markets-by-prukalpa-sankar.json @@ -2,7 +2,6 @@ "description": "PyData Delhi 2019 Keynote - The power of data science to measure unmeasured parameters in Emerging Markets by Prukalpa Sankar.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases. #PyDataDelhi2019 #PyData #DataScience", "duration": 1507, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-03", "speakers": [ "Prukalpa Sankar" diff --git a/pydata-delhi-2019/videos/transfer-learning-in-natural-language-processing-by-janu-verma.json b/pydata-delhi-2019/videos/transfer-learning-in-natural-language-processing-by-janu-verma.json index 382bd0840..9627621b2 100644 --- a/pydata-delhi-2019/videos/transfer-learning-in-natural-language-processing-by-janu-verma.json +++ b/pydata-delhi-2019/videos/transfer-learning-in-natural-language-processing-by-janu-verma.json @@ -2,7 +2,6 @@ "description": "Transfer learning refers to the methods that leverage a trained model in\none domain to achieve better results on tasks in a related domain i.e.\nwe transfer the knowledge gained in one domain to a new domain. This\ntalk is centered on recent developments in deep learning to facilitate\ntransfer learning in NLP. We will discuss the transformer architecture\nand its extensions like GPT and BERT.\n\nThe classic supervised machine learning paradigm is based on learning in\nisolation a single predictive model for a task using a single dataset.\nThis approach requires a large number of training examples and performs\nbest for well-defined and narrow tasks. Transfer learning refers to the\nmethods that leverage a trained model in one domain to achieve better\nresults on tasks in a related domain. The model thus trained also show\nbetter generalization properties.\n\nComputer vision has seen great success of transfer learning, model\ntrained on the Imagenet data have been 'fine-tuned' to achieve\nState-of-thee-art in many other problems. In last two years, NLP has\nalso witnessed the emergence of several transfer learning methods and\narchitectures, which significantly improved upon the state-of-the-art on\na wide range of NLP tasks.\n\nWe will present an overview of modern transfer learning methods in NLP,\nhow models are pre-trained, what information the representations they\nlearn capture, and review examples and case studies on how these models\ncan be integrated and adapted in downstream NLP tasks.\n", "duration": 2256, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-03", "speakers": [ "Janu Verma" diff --git a/pydata-delhi-2019/videos/understanding-opacity-in-machine-learning-models-by-ankit-rathi-yatin-bhatia.json b/pydata-delhi-2019/videos/understanding-opacity-in-machine-learning-models-by-ankit-rathi-yatin-bhatia.json index 2ea3892fa..77b45bdd0 100644 --- a/pydata-delhi-2019/videos/understanding-opacity-in-machine-learning-models-by-ankit-rathi-yatin-bhatia.json +++ b/pydata-delhi-2019/videos/understanding-opacity-in-machine-learning-models-by-ankit-rathi-yatin-bhatia.json @@ -2,7 +2,6 @@ "description": "Opacity is one of the biggest challenges in machine learning/deep\nlearning solutions in the real world. Any basic deep learning model can\ncontain dozens of hidden layers and millions of neurons interacting with\neach other. Explaining the Deep Learning model solutions can be a bit\nchallenging. Our proposal explain some Approaches that can help to make\nML/DL models more interpretable.\n\nModel Interpretability Background\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nData Science/AI models are still often perceived as a black box capable\nof performing magic. As we are solving more complex problems using\nadvanced algorithms, the situation is such that more sophisticated the\nmodel, lower is the explainability level.\n\nWithout a reasonable understanding of how DS/AI model works, real-world\nprojects rarely succeed. Also, business may not know the intricate\ndetails of how a model might work and as model will be making a lot of\ndecisions for them in the end, they do have a right to pose the\nquestion.\n\nA lot of real-world scenarios where biased models might have really\nadverse effects e.g. predicting potential criminals\n(https://www.propublica.org/article/machine-bias-risk-assessments-in-criminal-\nsentencing), judicial sentencing risk scores\n(https://www.propublica.org/article/making-algorithms-accountable),\ncredit scoring, fraud detection, health assessment, loan lending,\nself-driving.\n\nMany researchers are actively working on making DS/AI models\ninterpretable (Skater, ELI5, SHAP etc).\n\nWhy Model Interpretability is important?\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nDS/AI models are used to make critical decisions on behalf of business.\nFor the decisions taken by DS/AI models, business needs to cover these\nthree aspects as well:\n\n- Fairness - How fair are the predictions? What drives model\n predictions?\n- Accountability - Why did the model take a certain decision?\n- Transparency - How can we trust model predictions?\n\nHow to make models interpretable?\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nIn order to make models interpretable, following approaches/techniques\ncan be used:\n\n1. Feature Importance\n2. Partial Dependence Plot\n3. SHAP Values\n4. LIME\n5. Skater\n\nLets have a look at these approaches/techniques one by one:\n\n1. Feature Importance\n^^^^^^^^^^^^^^^^^^^^^\n\nFor Machine Learning Models like XGBoost, Random Forest, Machine\nLearning Feature Importance helps Business Analysts drive Logical\nConclusion out of it.\n\nWe measure the importance of a feature by calculating the increase in\nthe model\u2019s prediction error after permuting the feature. A feature is\n\u201cimportant\u201d if shuffling its values increases the model error, because\nin this case the model relied on the feature for the prediction. A\nfeature is \u201cunimportant\u201d if shuffling its values leaves the model error\nunchanged, because in this case the model ignored the feature for the\nprediction.\n\n.. figure:: https://lh6.googleusercontent.com/6QlWI_TX3B40v5uvcwB3A0ADF3y4JDNUEJFtaRMCoCdn7QouTqB4M4bgTPzukoXT5PN4YAnphqqavM_yreeHCI1ObwYZqnHmeYn9AGhtkC-1zCmb9W55mhdqS66J3quq9DeRS8FE\n :alt: \n\n2. Partial Dependence Plot\n^^^^^^^^^^^^^^^^^^^^^^^^^^\n\nPartial dependence plots show how a feature affects predictions. Partial\ndependence plots (PDP) show the dependence between the target response\nand a set of \u2018target\u2019 features, marginalizing over the values of all\nother features (the \u2018complement\u2019 features). Intuitively, we can\ninterpret the partial dependence as the expected target response as a\nfunction of the \u2018target\u2019 features.\n\n.. figure:: https://lh3.googleusercontent.com/SpyncU_BRXeMhocCaird59qXmIoLGISyPOQA1KEqj_IUHYxP58yu4yZuMwGL5C1VOWvHl_UOgvK7VgRzCuOh9OhAxqk7cZZodut9CaygiWWvxLcBYLFWQQ_L0iHMUugv5DrbA8Xc\n :alt: \n\n3. SHAP (SHapley Additive exPlanations) Values\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\nSHAP Values break down a prediction to show the impact of each feature.\nThese are the scenarios where we need this technique:\n\n- A model says a bank shouldn't loan someone money, and the bank is\n legally required to explain the basis for each loan rejection\n- A healthcare provider wants to identify what factors are driving each\n patient's risk of some disease so they can directly address those\n risk factors with targeted health interventions.\n- |image0|\n\nWe predicted 0.7, whereas the base\\_value is 0.4979. Feature values\ncausing increased predictions are in pink, and their visual size shows\nthe magnitude of the feature's effect. Feature values decreasing the\nprediction are in blue. The biggest impact comes from Goal Scored being\n2. Though the ball possession value has a meaningful effect decreasing\nthe prediction.\n\nThe SHAP package has explainers for every type of model. -\nshap.DeepExplainer works with Deep Learning models. -\nshap.KernelExplainer works with all models, though it is slower than\nother Explainers and it offers an approximation rather than exact Shap\nvalues.\n\n4. LIME (Local Interpretable Model-Agnostic Explanations)\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\nLIME (https://github.com/marcotcr/lime) can be used on anything from a\npolynomial regression model to a deep neural network.\n\nLIME\u2019s approach is to perturb most of the features of a single\nprediction instance \u2014 essentially zeroing-out these features \u2014 and then\nto test the resulting output. By running this process repeatedly, LIME\nis able to determine a linear decision boundary for each feature\nindicating its predictive importance (e.g. which pixels contributed the\nmost to the classification of a specific image).\n\nInterpretation of Lime :--\n\n- Local - Local refers to local fidelity - i.e., we want the\n explanation to really reflect the behaviour of the classifier\n \"around\" the instance being predicted.\n\n- Interpretable - Lime explain output of Classifiers which are\n interpretable by humans. For e.g. Representing words for a Model\n which is built on word embeddings.\n\n- Model Agnostic - Lime is able to explain a Machine Learning Model\n without understanding it in deep.\n\n- Explanation - Lime explanations are not too long so that it is\n difficult for Humans to understand it.\n\n.. figure:: https://lh4.googleusercontent.com/JmXlS0qJNYOvbLlmA53X42_WIGHp9uzDCItBtGpmPM8YHqgqlYzJ077VU0EjNVna6LNZHvgFHRWry6c_CUMCZ_-%20WnoZh2F3RoLE4Xalh_aimWw8QDkLFPzxPYjLtCZ8Ws7DZzPcW\n :alt: \n\n.. figure:: https://lh3.googleusercontent.com/g-nAKqqfemQR17DhBKzdYUDQJQYo7Q54Nzyf4rtTNInn8ZyI16l9VM8LmfaAclj40v5IhZHserrJY-%20qR-gA5_r6bwWlIat24sjdiuW085pkggHgrOgSbq_VQzZJnht-FyHChp9Zr\n :alt: \n\n5. Skater\n^^^^^^^^^\n\nSkater is a Python library designed to demystify the inner workings of\ncomplex or black-box models. Skater uses a number of techniques,\nincluding partial dependence plots and local interpretable model\nagnostic explanation (LIME), to clarify the relationships between the\ndata a model receives and the outputs it produces.\n\n.. |image0| image:: https://lh5.googleusercontent.com/lWsT9o5da1242Caaqqj66lWpY9yND6vEy4_3eT4dY_5Juyysnv3ZE4etya20rQMGzJ5E5PgJNUP05lLQZCuDUiAC0dfPlWjwZq-1m2p8SBylGDytFYRQCSBilE6pBVl7kRdjcdpV\n\n", "duration": 1493, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-03", "speakers": [ "Ankit Rathi", diff --git a/pydata-delhi-2019/videos/understanding-user-churn-in-gopay-by-sruthi-and-karthik-vijaykumar.json b/pydata-delhi-2019/videos/understanding-user-churn-in-gopay-by-sruthi-and-karthik-vijaykumar.json index d70b0b73a..4c3e335b5 100644 --- a/pydata-delhi-2019/videos/understanding-user-churn-in-gopay-by-sruthi-and-karthik-vijaykumar.json +++ b/pydata-delhi-2019/videos/understanding-user-churn-in-gopay-by-sruthi-and-karthik-vijaykumar.json @@ -2,7 +2,6 @@ "description": "How to identify customers who are dissatisfied with your product? How to\nunderstand what churning users need & provide them the right incentives?\nAt GoPay we have created a marketing channel using incentive offers to\nbring back churning users. The key elements of our strategy are: 1.\nIdentifying users who are most likely to churn 2. Providing the right\nincentives for the churning population\n\nIn this talk we will discuss our experience at GoPay on using incentives\nto win some churners back. The talk will have 4 components:\n\n1. Defining churn mathematically\n2. Modelling probability of a user churning in a given timeframe\n3. Finding the right incentives to bring back churned users\n4. Software infrastructure for the end-to-end process\n\nWe will also discuss some next steps we are developing:\n\n1. Experimentation framework to test custom incentive structures\n2. Moving from a fixed timeframe to a continuous monitoring system\n", "duration": 1587, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-04", "speakers": [ "Karthik Vijayakumar", diff --git a/pydata-delhi-2019/videos/using-nlp-for-disaster-management-by-kaustubh-hiware.json b/pydata-delhi-2019/videos/using-nlp-for-disaster-management-by-kaustubh-hiware.json index 7a4e52985..7c4a70691 100644 --- a/pydata-delhi-2019/videos/using-nlp-for-disaster-management-by-kaustubh-hiware.json +++ b/pydata-delhi-2019/videos/using-nlp-for-disaster-management-by-kaustubh-hiware.json @@ -2,7 +2,6 @@ "description": "During disasters, it is extremely crucial that the right resources are\nreceived to the victims within time. Disaster relief NGO's revealed that\nthere is often mismanagement and lack of coordination. Also, often\nidentifying right resources takes up a lot of crucial time. To aid this,\nwe developed an algorithm that identifies locations from microblogs,\nupto 100x faster than SoTA StanfordNLP.\n\nWe first developed an algorithm to identify location from microblogs in\na real time situation, which was 100 times faster than state of the art\nStanfordNLP. The proposed algorithm is also faster than other tools. We\nused NLP tools like dependency paring, Named Entity Recognition, and\nother rules to identify the location. This resulted into a research\npaper at WWW2018, WebConf held at France. To further assist disaster\nrelief attempts, we developed a platform that could identify crucial\ninformation like Resources, location, quantity etc thus effectively\nusing social media to aid disaster mitigation. The proposed platform\nemerged first in Microsoft's code.fun.do Hackathon out of 242\nparticipants, and was one of 21 student projects to be demonstrated on a\nnational level at Microsoft's AXLE, 2019.\n", "duration": 1550, "language": "eng", - "published_at": "2019-10-17T18:19:41.000Z", "recorded": "2019-08-04", "speakers": [ "Kaustubh Hiware" diff --git a/pydata-la-2019/videos/adrien-treuille-turn-python-scripts-into-beautiful-ml-tools-pydata-la-2019.json b/pydata-la-2019/videos/adrien-treuille-turn-python-scripts-into-beautiful-ml-tools-pydata-la-2019.json index fd65f3aca..8d48bcd51 100644 --- a/pydata-la-2019/videos/adrien-treuille-turn-python-scripts-into-beautiful-ml-tools-pydata-la-2019.json +++ b/pydata-la-2019/videos/adrien-treuille-turn-python-scripts-into-beautiful-ml-tools-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "This tutorial will discuss the use of internal machine learning tooling\nin real self-driving car projects. We'll download a human-annotated\nimage dataset from the Udacity Self-Driving Car Project and discuss how\nstakeholders would approach this data in a real company. Finally we will\nlive-code an app using Streamlit to semantically search and visualize\nthe dataset and run models against it.\n\nWe've all seen poor tooling slow down data science and machine learning\nprojects. In fact, most projects develop their own unique ecosystem of\nbug- ridden and unmaintainable internal tools to analyze data, often\nthrough a patchwork of Jupyter Notebooks and Flask apps.\n\nIn this workshop, we'll discover a new workflow to write ML tools as\nPython scripts using Streamlit, the first app framework for ML\nengineers.\n\nPart one will be a whirlwind tour of Streamlit, creating apps, UIs, and\ndata caches. Then we'll download a human-annotated image dataset from\nthe Udacity Self-Driving Car Project and explore it.\n\nPart two will be about product management. We'll discuss how\nstakeholders would approach this data in a real self-driving car\nproject. How would a machine learning engineer or product manager want\nto understand this data? We'll then live-code a Streamlit app to\nfacilitate their needs.\n\nPart three will get nerdier! We'll integrate an object detection model\n(YOLO v3) into our app to explore the potential tooling benefits from\ninteractive inference.\n\nAt the end of the workshop you will have (1) a beautiful demo to show\noff to friends, and (2) a new weapon to tackle tooling problems in your\nown projects.\n\nSee the GitHub: https://github.com/streamlit/demo-self-driving\n", "duration": 4130, "language": "eng", - "published_at": "2019-12-29T22:27:04.000Z", "recorded": "2019-12-03", "speakers": [ "Adrien Treuille" diff --git a/pydata-la-2019/videos/amy-tzu-yu-chen-whats-data-science-reporting-pydata-la-2019.json b/pydata-la-2019/videos/amy-tzu-yu-chen-whats-data-science-reporting-pydata-la-2019.json index b25972299..ca9df50aa 100644 --- a/pydata-la-2019/videos/amy-tzu-yu-chen-whats-data-science-reporting-pydata-la-2019.json +++ b/pydata-la-2019/videos/amy-tzu-yu-chen-whats-data-science-reporting-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "We do great things with data science models but often forget that it is\nas important to advocate and maintain model health. Building data\nscience reports ahead of time can help. In this talk, let\u2019s talk about\nhow to make useful data science reports for data scientists and business\nstakeholders. You do not need to be an R user to follow along, but you\nwill see some useful R packages and tricks!\n\nIn this talk, we will first walk through what data science reporting\nmeans and how it is different from BI and general-purpose dashboards.\nNext, we explore different options for producing data science reports in\na \u201cloud\u201d and clear way for data scientists and other technical and\nnon-technical business stakeholders. Throughout the talk, I will\nintroduce some useful R packages and tricks for building your own\nreports, however, you do not need to be an R user because the same\nprinciples apply. The talk is marked as intermediate but all levels are\nwelcome and should be able to follow along.\n", "duration": 1838, "language": "eng", - "published_at": "2019-12-23T21:02:44.000Z", "recorded": "2019-12-04", "speakers": [ "Amy Tzu-Yu Chen" diff --git a/pydata-la-2019/videos/ana-castro-salazar-pasha-stetsenko-intro-to-data-analysis-with-python-data-table-pydata-la-2019.json b/pydata-la-2019/videos/ana-castro-salazar-pasha-stetsenko-intro-to-data-analysis-with-python-data-table-pydata-la-2019.json index 6ca7ad697..6bd2b1d66 100644 --- a/pydata-la-2019/videos/ana-castro-salazar-pasha-stetsenko-intro-to-data-analysis-with-python-data-table-pydata-la-2019.json +++ b/pydata-la-2019/videos/ana-castro-salazar-pasha-stetsenko-intro-to-data-analysis-with-python-data-table-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "In this tutorial, an introduction of Data Analysis with Python\ndatatable, one would learn about data wrangling with datatable via a\nbanking loan scenario using a subset of the Fannie Mae and Freddie Mac\ndatasets. We would show how to munge loan-level data, obtain basic\ninsights, exploratory data analysis, model development, and model\nevaluation.\n\nDuring the tutorial session, we would use a banking loan scenario using\na subset of the Fannie Mae and Freddie Mac datasets where we would show\nhow to munge loan-level data. Additionally, we would give an overview of\nhow **Python datatable** is used to obtain basic insights that start\nwith data wrangling, exploratory data analysis, model development, and\nmodel evaluation.\n\n**Python datatable** is a library that implements a wide (and growing)\nrange of operators for manipulating two-dimensional data frames. It\nfocuses on: big data support, high performance, both in-memory and\nout-of-memory datasets, and multithreaded algorithms. Datatable\u2019s\npowerful API is similar to R data.table\u2019s, and it strives in providing\nfriendlier and intuitive API experience with helpful error messages to\naccelerate problem-solving.\n\n**Learn more about Python datatable:**\nhttps://github.com/h2oai/datatable\n\nPrerequisites\n~~~~~~~~~~~~~\n\n- Basic knowledge of Statistics and Machine Learning\n- Basic knowledge of Python\n- JupyterLab\n- Python datatable installed on your local machine or use cloud env:\n\n - datatable can be install by following:\n https://datatable.readthedocs.io/en/latest/install.html\n\n**Note:** As of now, datatable is only supported on Linux and Mac OS X.\nHowever, one can use it on Windows via a docker container.\n\nTutorial:\n~~~~~~~~~\n\n- **Task 0:** Introduction to Python datatable(10 min)\n- **Task 1:** datatable vs Pandas (10 mins)\n- **Task 2:** Understand the dataset (10 mins)\n- **Task 3:** datatable - Data Wrangling (10 mins)\n- **Task 4:** datatable - Exploratory Data Analysis (10 mins)\n- **Task 5:** datatable - Model Development (10 mins)\n- **Task 6:** datatable - Model Evaluation (10 mins)\n- **Task 7:** Q &A (10 - 15 mins)\n", "duration": 4558, "language": "eng", - "published_at": "2019-12-23T21:02:30.000Z", "recorded": "2019-12-03", "speakers": [ "Ana Castro Salazar", diff --git a/pydata-la-2019/videos/avik-das-dynamics-programming-for-machine-learning-hidden-markov-models-pydata-la-2019.json b/pydata-la-2019/videos/avik-das-dynamics-programming-for-machine-learning-hidden-markov-models-pydata-la-2019.json index fdabc81af..15b61a2b6 100644 --- a/pydata-la-2019/videos/avik-das-dynamics-programming-for-machine-learning-hidden-markov-models-pydata-la-2019.json +++ b/pydata-la-2019/videos/avik-das-dynamics-programming-for-machine-learning-hidden-markov-models-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Dynamic programming turns up in many machine learning algorithms, maybe\nbecause dynamic programming excels at solving problems involving\n\"non-local\" information. I explore one technique used in machine\nlearning, Hidden Markov Models, and how dynamic programming is used when\napplying this technique. Then, I'll show a few real-world examples where\nHidden Markov Models are used.\n\nA Hidden Markov Model deals with inferring the state of a system given\nsome unreliable or ambiguous observations from that system. One\nimportant characteristic of this system is the state of the system\nevolves over time, producing a sequence of observations along the way.\nBy incorporating some domain-specific knowledge, it\u2019s possible to take\nthe observations and work backwards to a maximally plausible ground\ntruth.\n\nThis talk explores Hidden Markov Models in three steps:\n\n- First, I define Hidden Markov Models and how they apply to machine\n learning problems.\n\n- Next, I build up an understanding of the Viterbi algorithm, used to\n infer the state of the system given a sequence of observations. This\n involves some basic math, but the goal is to form an intuition for\n the algorithm. Some sample Python code is presented to demonstrate\n how simple the algorithm is.\n\n- Finally, I introduce several real-world applications of Hidden Markov\n Models in machine learning. In this section, real-world\n considerations like feature extraction and training are discussed.\n\nBasic math knowledge is expected, just the ability to express concepts\nas equations and an understanding of Big-O notation. Basic Python\nknowledge is also expected, as code samples will be presented. The goal\nis build up intuition.\n\n`The content of this talk is available as an article on my personal\nblog. `__\n", "duration": 2365, "language": "eng", - "published_at": "2019-12-23T21:04:11.000Z", "recorded": "2019-12-05", "speakers": [ "Avik Das" diff --git a/pydata-la-2019/videos/ben-fowler-traditional-novel-feature-selection-approaches-pydata-la-2019.json b/pydata-la-2019/videos/ben-fowler-traditional-novel-feature-selection-approaches-pydata-la-2019.json index 13f76010d..50fe54c51 100644 --- a/pydata-la-2019/videos/ben-fowler-traditional-novel-feature-selection-approaches-pydata-la-2019.json +++ b/pydata-la-2019/videos/ben-fowler-traditional-novel-feature-selection-approaches-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Selecting the optimal set of features is a key step in the ML modeling\nprocess. This talk will present research conducted that tested five\napproaches for feature selection. The approaches included current widely\nused methods, along with novel approaches for feature selection using\nopen-source libraries, building a classification model using the Lending\nClub dataset.\n\nA central component to the Machine Learning process is feature\nselection. Selecting the optimal set of features is important to\ngenerate a best fit model which generalizes to unseen data. A widely\nused approach for feature selection involves calculating Gini Importance\n(Gain) to identify the best set of features. However, recent work from\nScott Lundberg has found challenges with the consistency of the Gain\nattribution method. This talk will present results of model metrics on\nthe Lending Club dataset, testing five different feature selection\napproaches. The approaches tested involved widely used approaches\ncombined with novel approaches for feature selection.\n\nThrough the experimental design of the five feature selection approaches\nthat were tested; attendees will gain clarity on the impact of:\n\n- Data splitting method\n- Including relevant two-way and three-way interactions (xgbfir\n library)\n- Backwards stepwise feature selection as opposed to a singular feature\n selection step\n- Backwards stepwise feature selection using Shapley values (shap\n library).\n\nThe knowledge from this research can provide added predictive power and\nvelocity to the feature selection process for Data Scientists.\n", "duration": 1806, "language": "eng", - "published_at": "2019-12-23T21:03:35.000Z", "recorded": "2019-12-04", "speakers": [ "Ben Fowler" diff --git a/pydata-la-2019/videos/brad-rees-rapids-open-source-gpu-data-science-pydata-la-2019.json b/pydata-la-2019/videos/brad-rees-rapids-open-source-gpu-data-science-pydata-la-2019.json index dc4c95355..9fc46c671 100644 --- a/pydata-la-2019/videos/brad-rees-rapids-open-source-gpu-data-science-pydata-la-2019.json +++ b/pydata-la-2019/videos/brad-rees-rapids-open-source-gpu-data-science-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "See how RAPIDS and the open source ecosystem are advancing data science.\nIn this session, we will explore RAPIDS, the open source data science\nplatform from NVIDIA. Come learn how to get started leveraging these\nopen-source libraries for faster performance and easier development on\nGPUs. See the latest engineering work and new release features,\nincluding benchmarks and software development roadmap\n\nThe RAPIDS suite of open source software libraries gives the data\nscientist the freedom to execute end-to-end data science and analytics\npipelines on GPUs. RAPIDS is incubatedby NVIDIA based on years of\naccelerated analytics experience. RAPIDS relies on NVIDIA CUDA\nprimitives for low-level compute optimization and exposes GPU\nparallelism and high-bandwidth memory speed through user-friendly Python\ninterfaces. Through a familiar DataFrame API that integrates with a\nvariety of machine learning algorithms, RAPIDS facilitates common data\npreparations tasks while removing typical serialization costs. RAPIDS\nincludes support for multi-GPU deployments, enabling vastly accelerated\nprocessing and training on large dataset sizes.\n\nJoin NVIDIA\u2019s engineers as they walk through a collection of data\nscience problems that introduce components and features of RAPIDS,\nincluding: feature engineering, data manipulation, statistical tasks,\nmachine learning, and graph analysis.\n", "duration": 2304, "language": "eng", - "published_at": "2019-12-23T21:04:17.000Z", "recorded": "2019-12-05", "speakers": [ "Brad Rees" diff --git a/pydata-la-2019/videos/carol-willing-keynote-pydata-la-2019.json b/pydata-la-2019/videos/carol-willing-keynote-pydata-la-2019.json index d3f6573dc..87d5a51cb 100644 --- a/pydata-la-2019/videos/carol-willing-keynote-pydata-la-2019.json +++ b/pydata-la-2019/videos/carol-willing-keynote-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "www.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 2232, "language": "eng", - "published_at": "2019-12-23T21:04:34.000Z", "recorded": "2019-12-05", "speakers": [ "Carol Willing" diff --git a/pydata-la-2019/videos/christopher-ariza-the-promise-of-hierarchical-indices-for-data-beyond-2-dimensions-pydata-la-2019.json b/pydata-la-2019/videos/christopher-ariza-the-promise-of-hierarchical-indices-for-data-beyond-2-dimensions-pydata-la-2019.json index d5df7c31b..60b477437 100644 --- a/pydata-la-2019/videos/christopher-ariza-the-promise-of-hierarchical-indices-for-data-beyond-2-dimensions-pydata-la-2019.json +++ b/pydata-la-2019/videos/christopher-ariza-the-promise-of-hierarchical-indices-for-data-beyond-2-dimensions-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Recent versions of Pandas have warned users of the imminent deprecation\nof the Panel, Panda\u2019s name-sake data structure for storing\nthree-dimensional data. This talk will examine the tradeoffs in\nperformance and interface between two types of Panel alternatives: using\nhierarchical indices in Pandas and StaticFrame, or using true\nn-dimensional arrays in NumPy or xarray.\n\nThis talk will aid those working in data science and related fields by\nexamining the tradeoffs between working with data in true\nmultidimensional data structures (i.e., NumPy and xarray) versus working\nwith hierarchical index implementations on one- or two-dimensional data.\n\nThe immediate point of departure is Pandas imminent deprecation of the\nPanel: for Pandas users who have used the Panel, this talk will\nillustrate how to transition away from the Panel and the tradeoffs in\nPanel alternatives.\n\nThis talk will explain how hierarchical indices work by comparing two\nimplementations: the Pandas MultiIndex and the StaticFrame\nIndexHierarchy. The StaticFream IndexHierarchy offers a new, independent\nimplementation of hierarchical indices that deviates from Pandas in\nsignificant ways: the index is literally composed of other index\nobjects, permitting usage of specialized index types (such as datetime\nindices), efficient memory usage of shared immutable objects, and the\nenforcement of a strict tree graph.\n\nAfter demonstrating how hierarchical indices can support higher\ndimensional data in one or two-dimensional arrays, the power and\nflexibility of selecting and slicing data with hierarchical indices will\nbe demonstrated.\n\nThe talk will close with performance analysis, isolating the overhead of\nusing hierarchical indices over true multi-dimensional array\nrepresentations, and comparing the performance of selection, slicing,\ngrouping, and function application of hierarchical data in NumPy,\nxarray, Pandas and StaticFrame.\n\nThis talk is aimed at both beginners, new to hierarchical indices, and\nmore advanced users interested in interface design and performance\ntradeoffs. Basic familiarity with NumPy and Pandas is expected. Audience\nmembers will leave with a better understanding of how hierarchical\nindices work, and what tradeoffs are made when using them.\n", "duration": 1914, "language": "eng", - "published_at": "2019-12-23T21:04:46.000Z", "recorded": "2019-12-05", "speakers": [ "Christopher Ariza" diff --git a/pydata-la-2019/videos/daniel-j-brooks-computer-vision-with-pytorch-pydata-la-2019.json b/pydata-la-2019/videos/daniel-j-brooks-computer-vision-with-pytorch-pydata-la-2019.json index a6e83f1e6..3ab5cb442 100644 --- a/pydata-la-2019/videos/daniel-j-brooks-computer-vision-with-pytorch-pydata-la-2019.json +++ b/pydata-la-2019/videos/daniel-j-brooks-computer-vision-with-pytorch-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Computer vision algorithms, which process video and image data, have\nmany applications. The development of convolutional neural networks,\nalongside hardware improvements, have led to a proliferation of highly\naccurate computer vision models. In this tutorial, you will learn how to\nbuild new, and use existing, computer vision models using PyTorch.\n\nThis tutorial is a practical, hands-on, introduction to computer vision\nwith PyTorch.\n\n| In this tutorial, you will learn about:\n| - An overview of deep learning for computer vision\n| - How to implement neural networks in PyTorch\n\n| You will gain hands-on experience with important computer vision\n tasks:\n| - Image classification\n| - Object detection\n| - Semantic segmentation\n| - Generative models\n\nTutorial materials are available on GitHub in Jupyter notebook format.\n\nLaptops are encouraged, but not required.\n", "duration": 4007, "language": "eng", - "published_at": "2019-12-23T21:00:14.000Z", "recorded": "2019-12-03", "speakers": [ "Daniel J. Brooks" diff --git a/pydata-la-2019/videos/dante-gama-dessavre-open-source-is-better-together-gpu-python-libraries-unite-pydata-la-2019.json b/pydata-la-2019/videos/dante-gama-dessavre-open-source-is-better-together-gpu-python-libraries-unite-pydata-la-2019.json index 7cce8a24c..904512896 100644 --- a/pydata-la-2019/videos/dante-gama-dessavre-open-source-is-better-together-gpu-python-libraries-unite-pydata-la-2019.json +++ b/pydata-la-2019/videos/dante-gama-dessavre-open-source-is-better-together-gpu-python-libraries-unite-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Today, the computational limits of CPUs are being realized, and GPUs are\nbeing utilized to satisfy the compute demands of users. In the past,\nthis has meant low level programming in C/C++, but today there is a rich\necosystem of open source software with Python APIs and interfaces. This\ntalk will highlight the journey of developing open source software on\ntop of and integrating with this ecosystem.\n\n1. PyData Ecosystem\n\n - Pandas, Numpy, SciPy, SKLearn, Dask, Cython, etc.\n - Highly interoperable with everything standardizing around Numpy /\n Pandas\n - Highly productive\n - Compute limited\n\n2. Apache Big Data Ecosystem\n\n - Spark, Beam, Flink, Hive, Impala, etc.\n - Semi interoperable but very technology dependent\n - Semi productive\n - Still compute limited\n\n3. GPUs\n\n - Thrust, CUB, NCCL, OpenUCX, etc.\n - Not very interoperable\n - Not productive\n - Not compute limited!\n\n4. Apache Arrow\n\n - Standards for memory layouts\n - Cross language compatible\n - Potential to bridge the PyData, Apache Big Data, and GPU\n ecosystems!\n\n5. RAPIDS\n\n - Combining the compute of GPUs with the productivity of the PyData\n ecosystem with the integration and interoperability of Apache\n Arrow\n - Built on top of OSS C/C++ GPU Ecosystem: Thrust, CUB, NCCL,\n OpenUCX\n - Integrated with OSS Python GPU Ecosystem: Numba, CuPy, PyTorch\n - Built on top of and integrated with OSS PyData Ecosystem: Pandas,\n Numpy, Dask, Cython\n\n6. Ecosystem Interoperability\n\n - Standards / Protocols\n - Numpy ``__array_function__`` protocol\n - ``__cuda_array_interface__`` protocol\n - DLPack\n - User Experience\n - Follow the same Python APIs that users are comfortable,\n productive, and happy with\n - Performance\n - Deliver 10-1000x the performance with nearly zero code change\n - Scaling\n - Scale the same way as existing PyData ecosystem with Dask\n - Improve Dask for everyone with lower level communication\n acceleration\n\n7. Struggles\n\n - CI\n - Travis-CI doesn\u2019t cut it for GPUs and no easy to use off the shelf\n alternative\n - Programming Paradigm Mindset\n - Thinking in terms of vectorized operations instead of loops /\n iterations\n - Amdahl\u2019s Law\n - New bottlenecks that we didn\u2019t previously worry about\n\n8. Conclusion\n9. Q/A\n", "duration": 2529, "language": "eng", - "published_at": "2019-12-23T21:04:51.000Z", "recorded": "2019-12-05", "speakers": [ "Dante Gama Dessavre" diff --git a/pydata-la-2019/videos/dmitry-petrov-machine-learning-models-versioning-using-open-source-tools-pydata-la-2019.json b/pydata-la-2019/videos/dmitry-petrov-machine-learning-models-versioning-using-open-source-tools-pydata-la-2019.json index 65b3d399e..3dddcba13 100644 --- a/pydata-la-2019/videos/dmitry-petrov-machine-learning-models-versioning-using-open-source-tools-pydata-la-2019.json +++ b/pydata-la-2019/videos/dmitry-petrov-machine-learning-models-versioning-using-open-source-tools-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "AI and ML are becoming an essential part of software engineering. Open\nsource tools like Git, Git-LFS, MlFlow can increase ML teams\nproductivity by introducing best practices. However, large datasets\nmanagement and versioning are not covered by these tools. We will show\nhow to overcome the limitations of the tools by using DVC.org - an\nopen-source project for ML models and datasets versioning.\n\nAI and ML are becoming an essential part of software engineering. The\ntraditional engineering toolset does not fully cover machine learning\nteam's needs. The teams need new tools for data versioning, ML pipeline\nversioning, ML model versioning, experiments metrics tracking, and\nothers.\n\nML workflow is data-centric while software engineering workflow is\ncentered around source code. We will discuss the current practices of\norganizing ML projects using open-source tools like Git, Git-LFS, MlFlow\nas well as their limitations. Thereby motivation for developing new ML\nspecific data versioning systems will be explained.\n\nData Version Control or DVC.ORG is an open-source command-line tool. We\nwill show how to version ML models and multi-gigabyte datasets, how to\nuse your favorite cloud storage (S3, Google Cloud Storage, or bare metal\nSSH server) as a data file backend, how to apply the best engineering\npractices to your ML projects and how to combine the different tools in\nthe same project.\n", "duration": 1931, "language": "eng", - "published_at": "2019-12-23T21:04:57.000Z", "recorded": "2019-12-05", "speakers": [ "Dmitry Petrov" diff --git a/pydata-la-2019/videos/eric-busboom-tackling-homelessness-with-open-data-pydata-la-2019.json b/pydata-la-2019/videos/eric-busboom-tackling-homelessness-with-open-data-pydata-la-2019.json index 6bb366881..0bd49582a 100644 --- a/pydata-la-2019/videos/eric-busboom-tackling-homelessness-with-open-data-pydata-la-2019.json +++ b/pydata-la-2019/videos/eric-busboom-tackling-homelessness-with-open-data-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Volunteer data scientists have converted 5 years of data about sheltered\nhomeless people in Downtown San Diego from hand-marked paper maps to a\ndetailed geographic dataset, using a combination of machine vision\ntools, Python programs and manual work. This talk will review how we\ncreated the dataset and how we are working with government and\nuniversities to use the dataset to inform social policy.\n\nFor the last 7 years, the `Downtown San Diego\nPartnership `__ has been conducting\nmonthly counts of homeless in the Downtown neighborhood. The data is\nrecorded on paper maps, which are `compiled into a\nspreadsheet. `__\nThis is a fantastic dataset which would be even more useful if the\nhand-recorded paper maps were digitized. This project involves\ncollecting and digitizing 5 years of the monthly maps, to produce a\ngeographic dataset, whcih we will analyze for time trends and for the\nassociation between homeess movements, geography and the built\nenvironment.\n\nThis talk is suitable for people with an interest in using data to solve\nsocial issues any level of skill.\n\nThis talk will:\n\n- Describe the scope of the homeless problem in San Diego and present\n the opportunities for data science to inform homeless policy.\n- Detail the process we used to convert the hand-marked maps, both the\n manual process and use of machine vision\n- Show the analysis of the final dataset, with specific emphasis on\n geographic analysis using Geopandas and Jupyter.\n- Demonstrate the data management process, using Metatab to package\n data and publish it to Wordpress\n- Present how we are training volunteer analysts in the PyData tools to\n answer data questions posed by researchers.\n\nAttendees will learn important aspects of an important social problem\nand how to use data to solve these problems using a range of Python\ntools.\n", "duration": 2501, "language": "eng", - "published_at": "2019-12-23T21:05:03.000Z", "recorded": "2019-12-05", "speakers": [ "Eric Busboom" diff --git a/pydata-la-2019/videos/fletcher-riehl-using-embedding-layers-to-manage-high-cardinality-categorical-data-pydata-la-2019.json b/pydata-la-2019/videos/fletcher-riehl-using-embedding-layers-to-manage-high-cardinality-categorical-data-pydata-la-2019.json index 44602abd5..d6a3eb3bd 100644 --- a/pydata-la-2019/videos/fletcher-riehl-using-embedding-layers-to-manage-high-cardinality-categorical-data-pydata-la-2019.json +++ b/pydata-la-2019/videos/fletcher-riehl-using-embedding-layers-to-manage-high-cardinality-categorical-data-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "At System1, data scientists are faced with the task of predicting\nrevenue and cost per click across millions of unique keywords that drive\ntraffic to our sites or monetize on a pay per click basis. This talk\nwill show a variety of techniques we use to extract the most information\nfrom categorical variables, especially anonymized, sparse, high\ncardinality categorical variables, like search terms.\n\nCategorical variables are easily interpretable by data scientists and\nnon- technical people, but they can also be difficult to translate into\nmachine learning algorithms. Categorical variables need to be converted\nto quantitative values to be used in machine learning models and can\nvery quickly explode the feature space of a model, add noise or\nunintended signals to the data, or simply not include all the meaning\nand predictive power that feature provides for the dependent variable.\nThere are many popular and effective libraries that abstract categorical\nvariable feature creation. However, if a model is sensitive from a\nfinancial, data ethics, or some level of public visibility standpoint,\nor simply prone to overfitting, it is vital to understand how the model\nis capturing all features and how to tune model parameters or input\ndata. Furthermore, if dealing with personal or sensitive data, machines\nneed to be able to handle anonymized categories while still allowing a\nhuman to interpret the source data. One of the problems we face at\nSystem1 is that individual keywords can receive very little traffic,\nsometimes less than a click per day; however, across millions of\nkeywords, these long- tail keywords comprise significant revenue.\nFurthermore, data science models need to be proactive and adjust bids\nand traffic based on seasonal components even if there is no data from\nthe prior season. This talk will present a variety of practical\ntechniques to extract and retain information and predictive power for\ncategorical variables. We will talk about model selection, feature\ncreation and techniques for converting categorical variables to\nquantitative values for modeling. Finally, the talk will present an\ninteresting technique that utilizes embedding layers and transfer\nlearning in a neural network framework to predict cost per click values\non search terms.\n", "duration": 2111, "language": "eng", - "published_at": "2019-12-29T22:30:13.000Z", "recorded": "2019-12-05", "speakers": [ "Fletcher Riehl" diff --git a/pydata-la-2019/videos/franklin-sarkett-building-a-data-driven-organization-pydata-la-2019.json b/pydata-la-2019/videos/franklin-sarkett-building-a-data-driven-organization-pydata-la-2019.json index 96c8b399e..c822cfe58 100644 --- a/pydata-la-2019/videos/franklin-sarkett-building-a-data-driven-organization-pydata-la-2019.json +++ b/pydata-la-2019/videos/franklin-sarkett-building-a-data-driven-organization-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Buckminster Fuller said, \u201cIf you want to teach people a new way of\nthinking, don\u2019t bother trying to teach them. Instead, give them a tool,\nthe use of which will lead to new ways of thinking.\u201d This profound\ninsight shaped the way I build my organization. We will look at some\ntools that enable us to think like a data-driven organization.\n\nBuckminster Fuller said, \u201cIf you want to teach people a new way of\nthinking, don\u2019t bother trying to teach them. Instead, give them a tool,\nthe use of which will lead to new ways of thinking.\u201d This profound\ninsight shaped the way I build my organization. We will look at some of\nthe tools that enable us to think like a data-driven organization.\n\nTeams are built out of three things: \\* People \\* Processes \\* Resources\n\nThere is a 3 step process we can deploy. We start by focusing on\nresources, then processes, and lastly people. With each step we climb\nthe ladder of value.\n\nWith resources, we have tools. By choosing specific tools, we can being\nto shape our thinking. We can create mental pathways that intentionally\ntake us in a particular direction.\n\nAfter we establish the tools, we focus on our processes. These are our\nworkflows and in many ways, our philosophies. How we think things should\nproceed. This could be kanban and how we move work from left to right\nthrough the pipeline, and it can be kaizen and belief in small,\ncontinuous improvement. Processes can be considered tools as much as\nresources.\n\nOnce we have resources and processes, we can free up bandwidth with our\npeople. People are most creative and dynamic at this stage once friction\nis removed and adding business value can go from one-off fixes to\nbecoming part of the organization itself.\n\nTools that facilitate a data-driven culture: \\* Mental models \\* Kanban\nand Kaizen \\* Opinionated, structured data science like Kedro \\* OKRs \\*\nKPIs\n", "duration": 2523, "language": "eng", - "published_at": "2019-12-23T21:02:50.000Z", "recorded": "2019-12-04", "speakers": [ "Franklin Sarkett" diff --git a/pydata-la-2019/videos/franklin-velasquez-introduction-to-h20-automl-with-python-pydata-la-2019.json b/pydata-la-2019/videos/franklin-velasquez-introduction-to-h20-automl-with-python-pydata-la-2019.json index bb9b5fb5c..42bfd88ff 100644 --- a/pydata-la-2019/videos/franklin-velasquez-introduction-to-h20-automl-with-python-pydata-la-2019.json +++ b/pydata-la-2019/videos/franklin-velasquez-introduction-to-h20-automl-with-python-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "In this tutorial, we intend to do automated modeling on a subset of the\nloan- level data from Fannie Mae and Freddie Mac using H2O's automated\nalgorithm(AutoML). We will solve a binary classification problem\n(predicting if a loan is delinquent or not). Also, we will explore a\nregression use-case (predicting interest rates on the same dataset). We\nwill be using the h2o Python module in a JupyterLab.\n\nChoosing the best machine learning models and tuning them can be time\nconsuming and exhaustive. Often, it requires levels of expertise to know\nwhat parameters to tune. The field of Automated Machine Learning\n(AutoML) focuses on solving this issue. AutoML is useful both for\nexperts, by automating the process of choosing and tuning a model; and\nfor non-experts as well, by helping them to create high performing\nmodels in a short time frame. H2O is an open-source, distributed machine\nlearning platform with APIs in Python, R, Java, and Scala. H2O AutoML is\nan automated algorithm for automating the machine learning workflow,\nwhich includes automatic training, hyper-parameter optimization, model\nsearch and selection under time, space, and resource constraints. H2O's\nAutoML further optimizes model performance by stacking an ensemble of\nmodels.\n\nReferences\n~~~~~~~~~~\n\n- `H2O\n AutoML `__\n\n- `An Open Source AutoML\n Benchmark `__\n\nPrerequisites:\n~~~~~~~~~~~~~~\n\n- Basic knowledge of Machine Learning\n\n- Familiarity with Python\n\n- JupyterLab\n\n- H2O installed on local machine or cloud environment\n\n - Quick H2O installation (requires Java and h2o Python module)\n\nOutline:\n~~~~~~~~\n\n- Task 0: Introduction to Automatic Machine Learning, H2O and H2O\n AutoML (15 min)\n- Task 1: Importing libraries, initializing H2O, importing data (5 min)\n- Task 2: Data Preparation and Transformations (5 min)\n- Task 3: H2O AutoML Classification and Model Evaluation\n (Interpretation) (15 min)\n- Task 4: H2O AutoML Regression and Model Evaluation (Interpretation)\n (15 min)\n- Task 5: H2O AutoML Classification in Flow (10 min)\n- Task 6: H2O AutoML Regression in Flow (15 min)\n- Task 7: Q&A (10 min)\n", "duration": 4681, "language": "eng", - "published_at": "2019-12-24T04:05:00.000Z", "recorded": "2019-12-03", "speakers": [ "Franklin Velasquez" diff --git a/pydata-la-2019/videos/hao-jin-accelerate-numpy-data-science-workloads-and-deep-learning-applications-pydata-la-2019.json b/pydata-la-2019/videos/hao-jin-accelerate-numpy-data-science-workloads-and-deep-learning-applications-pydata-la-2019.json index 18e03d54a..6731e82e2 100644 --- a/pydata-la-2019/videos/hao-jin-accelerate-numpy-data-science-workloads-and-deep-learning-applications-pydata-la-2019.json +++ b/pydata-la-2019/videos/hao-jin-accelerate-numpy-data-science-workloads-and-deep-learning-applications-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "In this talk, we\u2019ll cover creation of a multilayer perceptron model\nusing gluon and MXNet\u2019s new NumPy-compatible functions, a port of the\nclassic NumPy with GPU accelerations and additional features for deep\nlearning.\n\nAll the code snippets shown during the talk are available at\nhttps://github.com/haojin2/PyData-LA-Demo\n\nDiving into deep learning requires understanding bulky new frameworks,\nwhich significantly increases the adoption curve for data scientists in\nindustry. In this talk, we\u2019ll cover creation of a multilayer perceptron\nmodel using gluon and MXNet\u2019s new NumPy-compatible functions, a port of\nthe classic NumPy with GPU accelerations and additional features for\ndeep learning. These open source tools will give you a working\nfoundation for building out more complicated models for real\napplications with faster performance and less hassle.\n", "duration": 1726, "language": "eng", - "published_at": "2019-12-23T21:05:11.000Z", "recorded": "2019-12-05", "speakers": [ "Hao Jin" diff --git a/pydata-la-2019/videos/hareem-naveed-write-the-docs-pydata-la-2019.json b/pydata-la-2019/videos/hareem-naveed-write-the-docs-pydata-la-2019.json index 32b6c9776..065b28ab7 100644 --- a/pydata-la-2019/videos/hareem-naveed-write-the-docs-pydata-la-2019.json +++ b/pydata-la-2019/videos/hareem-naveed-write-the-docs-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "This talk will provide an in-depth overview of Sphinx and how it can be\nused to generate intelligent and readable documentation of your Python\ncode.\n\nDocumenting code should be the easiest part, yet it is often cited as\nthe hardest part of developing a data science product. In this talk we\nwill introduce Sphinx, an open-source tool that allows you to template\ndocumentation for any project. We will give an overview of how Sphinx\nworks, and walk through:\n\n- Examples of documentation generated with Sphinx\n\n- reStructuredText vs Markdown\n\n- How does Docutils work at the individual file level\n\n- How does Sphinx tie it all together\n\n- Best practices for setting up your documentation\n\nThis talk is for anyone who has ever wondered how all the Read the Docs\nare generated!\n", "duration": 2000, "language": "eng", - "published_at": "2019-12-23T21:02:56.000Z", "recorded": "2019-12-04", "speakers": [ "Hareem Naveed" diff --git a/pydata-la-2019/videos/hayley-song-experimental-machine-learning-with-holoviz-and-pytorch-in-jupeyterlab-pydata-la-2019.json b/pydata-la-2019/videos/hayley-song-experimental-machine-learning-with-holoviz-and-pytorch-in-jupeyterlab-pydata-la-2019.json index 197d0e52e..bc46668a9 100644 --- a/pydata-la-2019/videos/hayley-song-experimental-machine-learning-with-holoviz-and-pytorch-in-jupeyterlab-pydata-la-2019.json +++ b/pydata-la-2019/videos/hayley-song-experimental-machine-learning-with-holoviz-and-pytorch-in-jupeyterlab-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "This tutorial introduces how to make your data exploration and neural\nnetwork training process more interactive and exploratory by using the\ncombination of JupyterLab, HoloViews, and PyTorch. I will first\nintroduce the basic concepts behind HoloViews, and walk through how to\nembellish each step of your machine learning workflow with HoloVie to\nemphasize the experimental nature of modeling.\n\n**Update** : Please visit `this\nrepo `__ for tutorial\nmaterials\n\n- Subtitle: A guide through multi-class road detection on satellite\n images with interactive visualization and explorative model building\n- Author: Hayley Song (`[email\n protected] `__)\n- Category: step-by-step tutorial\n- Prereq:\n\n - Basic understanding of visaulization with python (eg. previously\n have used matplotlib.pyplot library)\n - | Basic understanding of neural network training process\n | I'll give a brief overview of the workflow, assuming audiences'\n previous experience with the following concepts\n\n - mini-batch training\n - forward-pass, backword-pass\n - gradient, gradient descent algorithm\n - classification, semantic segmentation\n - image as numpy ndarray\n\n- Material distribution\n\n - All materials needed to follow the tutorial will be shared in a\n self-containing GitHub repo, as well as a Binder environment\n - **Update** : Please visit `this\n repo `__ for tutorial\n materials\n - Links to extra resources will be provided as appropriate\n\nOverview\n--------\n\nThis tutorial introduces how to make your data exploration and model\nbuilding process more interactive and exploratory by using the\ncombination of JupyterLab, HoloViews, and PyTorch.\n`HoloViews `__ is a set of Python libraries that\noffers simple yet powerful visualization and GUI building tools which,\ntogether with other data analysis libraries (eg. ``pandas``,\n``geopandas``, ``numpy``) and machine learning framework (eg.\n``PyTorch``, ``Tensorflow``) can make your modeling procedure more\ninteractive and exploratory. I will start by introducing four core\nHoloViews libraries (Holoviews, GeoViews, Panel and Param) and\ndemonstrate basic examples on how we can essentially replace any\n\"Matplotlib.pyplot\" calls with equivalents in ``HoloViews``. You will\nsee how this opens up the possibilities to directly interact with your\nvisualization by eg. hovering over the graph to inspect values, querying\nRGB values of an image, or Lat/Lon values on your map.\n\nFollowing the introduction of the HoloViews libraries, I will\ndemonstrate how to embellish each step of your machine learning workflow\nwith HoloViews. First, you will learn to easily turn your PyTorch codes\ninto a simple GUI that encaptulates the state of your model (or\nalternatively, the state of your training session). This GUI explicitly\nexposes your model parameters and training hyperparameters (eg. learning\nrate, optimizer settings, batch size) as directly tunable parameters.\nCompared to conventional ways of specifying the hyperparameter settings\nwith the help of 'argparse' library or config files, this GUI approach\nfocuses on the experimental nature of modeling and integrates seamlessly\nwith Jupyter notebooks. After training a neural network model using our\nown GUI in the notebook, I will demonstrate how to understand the model\nby visualizing the intermediate layers with HoloViews and test the model\nwith test images directly sampled from HoloViews visualization.\n\nTo illustrate these steps, I will focus on the problem of classfying\ndifferent types of roads on satellite images, defined as a multi-class\nsemantic segmentation problem. Starting from the data exploration to the\ntrained model understanding, you will learn different ways to explore\nthe data and models by easily building simple GUIs in a Jupyter\nnotebook.\n\nIn summary, by the end of the talk you will have learned: - how to make\nyour data exploration more intuitive and experimental using HoloViews\nlibraries - how to turn your model script into a simple GUI that allows\ninteractive hyperparameter tuning and model exploration - how to monitor\nthe training process in realtime - how to quickly build a GUI tool to\ninspect the trained models in the same Jupyter notebook\n\nThe provided example codes will be a great starting point to experiment\nthese tools on your own datasets and tasks.\n\nOutline\n-------\n\nThis tutorial will consists of five main sections. I will first\nintroduce the basic concepts behind ``Holoviews/Geoviews`` and ``Panel``\nwhich are the main libraries we are going to use to add interactive\nexploration tools for data exploration and model training/evaluation,\nall in a single Jupyter notebook. This will take ~15 minutes. The rest\nof the tutorial will flow in the order of the general neural network\ntraining workflow, while integrating these libraries at each step. I\nwill leave the last <10 minutes for questions.\n\n- Step 0: Introduction to ``Holoviews``/``Geoviews`` and ``Panel``\n [15mins]\n- Step 1: Explore your dataset with ``Holoviews``/``Geoviews`` [15mins]\n- Step 2: Build an easily-configurable neural network model with\n ``param`` [15mins]\n- Step 3: Monitor your training process through an interactive GUI\n [15mins]\n- Step 4: Analyze your learned model on new images + Understand what\n your model has learned by looking at intermediate feature maps with\n ``Holoviews`` and ``Panel`` [15mins]\n- Q/A [5~10 mins]\n\nStep 0: Introduction to ``HoloViews`` libraries\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nIn this introductory section, I will go over the basic concepts behind\nthe ``HoloViews`` libraries. I will provide simple examples that show\nhow we can replace any ``Matplotlib`` plot calls with equivalent calls\nin ``Holoviews/Geoviews`` with no hassle, and build easy tools to\ninteract with your data.\n\nStep 1: Explore your dataset\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nThe first step in building a machine learning model is to understand\nyour dataset. For the scope of this tutorial (ie.semantic segmentation\nof road types from satellite images), we will use the SpaceNet datasets.\nMore details on how to get the data as well as how the data are\ncollected and annotated can be found\n`here `__.\nThe original dataset is very large (>100GB) and requires a lot of\npreprocessing to be useful for training. For example, the RGB images are\n16bits of size 1300x1300, and the \"target\" roads are vector lines (as\nopposed to raster images), which means they need to be rasterized. I\nhave prepared a smaller sample dataset consisting of the RGB images\nconverted to 8bits and cropped to 520x520 size, as well as road buffers\nas rasters which can be easily used as the target images. I will share\nthe dataset to accompany my tutorial. The shared dataset will consists\nof input RGB images and target mask images. Each pixel of a target image\nwill contain one of the labels in {'highway', 'track', 'dirt', 'others'}\n(as ``uint8``).\n\nThe focus of this section is to show how to build a GUI-like\nvisualization of a satellite dataset within a Jupyter notebook using\n``Holoviews``/``Geoviews``. See Figure 1 (in the shared Google Drive)\nfor an example. Unlike a static plot (eg. one that is generated from\nMatplotlib), one can hover over the ``Holoviews`` plot to inspect the\nlabels at each pixel of the mask image or to check the lat/lon\nlocations. Furthermore I will show how you can trigger more complicated\ncomputations (eg. compute road length within a selected zone), while\ninteracting with the plot directly, eg. selecting a region by mouse\ndrag, clicking a lat/lon by mouse click.\n\nThe second example will show how this interactive plot can extended to\nincorporate external information (eg. roadlines from OpenStreetMap) to\neasily compare with your own dataset. See Figure 2 (in the shared Google\nDrive) for a snapshot of such tool. In this example, as you select\ndifferent RGB filenames (of your dataset), you have an option to click\non the 'click to download OSM' to download the corresponding region's\nOSM road data, and visualize it as an interactive map.\n\nStep 2: Monitor the training process\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nIn this section, I will show how to wrap around a ``PyTorch``'s NN model\nwith ``param``'s \\`Parametrized' class to expose its hyperparameters as\ntunable parameters. Using the GUI representation of the NN model, we can\ncontrol the (hyper)parameter configurations more intuitively, and study\ntheir effects. Its seamless integration into a Jupyter notebook\nfacilitates the experimental side of machine learning training pocess.\n\nStep 3: Interactively test your trained model on the new data\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nStep 4: Understand what the model has learned\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n\n\nI will conclude the tutorial by summarzing the main takeaways and\nproviding pointers to useful resources:\n\n- General\n\n - Github repo for this talk\n - Link to HoloViews libraries\n - more: DataShader\n - PyTorch, torchvision\n\n- Geospatial Data\n\n - remote sensing data: google-earth-engine\n - libraries: xarray, dash, rasterio, geopandas\n", "duration": 4391, "language": "eng", - "published_at": "2019-12-23T21:02:16.000Z", "recorded": "2019-12-03", "speakers": [ "Hayley Song" diff --git a/pydata-la-2019/videos/ivona-tautkute-ai-and-fashion-product-retrieval-with-multi-modally-generated-data-pydata-la-2019.json b/pydata-la-2019/videos/ivona-tautkute-ai-and-fashion-product-retrieval-with-multi-modally-generated-data-pydata-la-2019.json index 74167b985..724a727de 100644 --- a/pydata-la-2019/videos/ivona-tautkute-ai-and-fashion-product-retrieval-with-multi-modally-generated-data-pydata-la-2019.json +++ b/pydata-la-2019/videos/ivona-tautkute-ai-and-fashion-product-retrieval-with-multi-modally-generated-data-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "The talk will cover generative modeling for multimodal input (image and\ntext) in the context of product retrieval in fashion/e-commerce.\n\nThe presentation will include examples of applying generative (GAN)\narchitectures for image generation with multimodal query using models\nderived from Conditional GAN, StackGAN, AttnGAN and others.\n\nRetrieving products from large databases and finding items of particular\ninterest for the user is a topic of ongoing research. Moving further\nfrom text search, tag based search and image search, there is still a\nlot of ambiguity when visual and textual features need to be merged.\nText query might compliment an image (\"I want sport shoes like these in\nthe image, produced by XXX, wide fit and comfortable\") or might\nrepresent a difference from image query (\"I want a dress like that in\nthe picture, only with shorter sleeves\").\n\nTalk outline:\n\n- Use cases in e-commerce and fashion\n- Current methods for learning multimodal embedding (VSE, Multimodal\n Siamese Networks)\n- Intro to GAN architectures that take latent representation as an\n input (we can influence what we generate, yeah!)\n- How do you feed multimodal input into GAN\n- Results and comparison\n", "duration": 1494, "language": "eng", - "published_at": "2019-12-24T02:59:41.000Z", "recorded": "2019-12-05", "speakers": [ "Ivona Tautkute" diff --git a/pydata-la-2019/videos/james-powell-what-you-got-is-what-you-got-pydata-la-2019.json b/pydata-la-2019/videos/james-powell-what-you-got-is-what-you-got-pydata-la-2019.json index 7bd081390..0c36e6e1c 100644 --- a/pydata-la-2019/videos/james-powell-what-you-got-is-what-you-got-pydata-la-2019.json +++ b/pydata-la-2019/videos/james-powell-what-you-got-is-what-you-got-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Composition, inheritance, restricted computation domains, boxed versus\nunboxed, and the search for a perfect proxy. But, folks, what you got is\nwhat you got.\n\nComposition, inheritance, restricted computation domains, boxed versus\nunboxed, and the search for a perfect proxy. But, folks, what you got is\nwhat you got.\n", "duration": 2121, "language": "eng", - "published_at": "2019-12-24T03:39:25.000Z", "recorded": "2019-12-05", "speakers": [ "James Powell" diff --git a/pydata-la-2019/videos/jeffrey-mew-build-an-ai-powered-pet-detector-in-visual-studio-code-pydata-la-2019.json b/pydata-la-2019/videos/jeffrey-mew-build-an-ai-powered-pet-detector-in-visual-studio-code-pydata-la-2019.json index 1f09dc3e7..afa683db0 100644 --- a/pydata-la-2019/videos/jeffrey-mew-build-an-ai-powered-pet-detector-in-visual-studio-code-pydata-la-2019.json +++ b/pydata-la-2019/videos/jeffrey-mew-build-an-ai-powered-pet-detector-in-visual-studio-code-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Ever wondered what breed that dog or cat is? Let\u2019s build a pet detector\nservice to recognize them in pictures!\n\nEver wondered what breed that dog or cat is? Let\u2019s build a pet detector\nservice to recognize them in pictures! In this talk, we will walk\nthrough the training, optimizing, and deploying of a deep learning model\nby using VS Code and the Azure Machine Learning service. We will use\ntransfer learning to recognize dog and cat breeds. Next, we\u2019ll optimize\nthe model using Azure Machine Learning service to improve the model\naccuracy. Putting on our developer hat, we'll then refactor the\nnotebooks into Python modules using VS Code. Finally, we will deploy the\nmodel as a web service in Azure, all from the comforts of VS Code. Come\nto see how Azure and VS Code makes AI and machine learning development\nand deployment easy.\n", "duration": 2039, "language": "eng", - "published_at": "2019-12-24T02:48:18.000Z", "recorded": "2019-12-05", "speakers": [ "Jeffrey Mew" diff --git a/pydata-la-2019/videos/john-healy-map-all-the-things-pydata-la-2019.json b/pydata-la-2019/videos/john-healy-map-all-the-things-pydata-la-2019.json index f697e5f35..3d19dfa92 100644 --- a/pydata-la-2019/videos/john-healy-map-all-the-things-pydata-la-2019.json +++ b/pydata-la-2019/videos/john-healy-map-all-the-things-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Embedding techniques like word2vec and doc2vec are taking over the\nworld. An up and coming technique for embedding numeric data is UMAP.\nHow would you go about applying UMAP to real word data? How about text\ndata? What about malware? In this talk we\u2019ll learn how to MAP all the\nthings!\n\nEmbedding techniques are taking over the world. From word2vec to embed\nwords, all the way to Latent Dirichlet Allocation and doc2vec to embed\ndocuments. All these techniques are really about turning non-numeric\ndata into vector space data suitable for either machine learning or\nvisualization. An up and coming technique for embedding numeric data is\n`UMAP `__. How would you go about\napplying UMAP to word data? How about text data? What about malware? In\nthis talk we\u2019ll learn how to MAP all the things!\n\nWe\u2019ll introduce you to a new technique called WordMAP for generating\nvery low dimensional word embeddings by making use of UMAP. With this\ntechnique in hand one can generalize to a document embedding algorithm\nwe're calling DocMAP. This approach ultimately only requires sequences\nof tokens and thus can apply to much broader classes of problems. We\u2019ll\ndemonstrate this by applying a variation of DocMAP to the problem of\nmapping the space of malware based on it\u2019s behaviour.\n\nWhile the math behind UMAP might be challenging to some this talk will\nfocus more on how to apply it in novel situations and take a more\npractical approach to things. If you have problems that can fit in this\nframework you should come and learn how to MAP all the things!\n", "duration": 2908, "language": "eng", - "published_at": "2019-12-24T02:48:25.000Z", "recorded": "2019-12-05", "speakers": [ "John Healy" diff --git a/pydata-la-2019/videos/john-mount-nina-zumel-preparing-messy-data-for-supervised-pydata-la-2019.json b/pydata-la-2019/videos/john-mount-nina-zumel-preparing-messy-data-for-supervised-pydata-la-2019.json index 0ccf0497d..1b4a68aef 100644 --- a/pydata-la-2019/videos/john-mount-nina-zumel-preparing-messy-data-for-supervised-pydata-la-2019.json +++ b/pydata-la-2019/videos/john-mount-nina-zumel-preparing-messy-data-for-supervised-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Cleaning messy data is a necessary component of data science projects.\nThe vtreat package automates common data preparation steps for\nsupervised machine learning. In this talk, we will introduce vtreat and\ndemonstrate its effective use with Pandas and xgboost on real-world\ndata.\n\nData characterization, treatment, and cleaning are necessary (though not\nalways glamorous) components of machine learning and data science\nprojects. While there is no substitute for getting your hands dirty in\nthe data, there are many data issues that repeat from project to\nproject. In particular, there are pitfalls in properly dealing with\nmissing data values, previously unobserved categorical values, and\nhigh-cardinality categorical variables.\n\nIn this talk, we will discuss using the vtreat package to prepare data\nfor supervised machine learning. We will demonstrate vtreat on a\nreal-world data set, with xgboost and Pandas. Vtreat automates the\nstatistically sound treatment of common data problems, leaving the data\nscientist free to concentrate on problem-specific data and modeling\nissues.\n", "duration": 1938, "language": "eng", - "published_at": "2019-12-23T21:05:16.000Z", "recorded": "2019-12-05", "speakers": [ "John Mount", diff --git a/pydata-la-2019/videos/joseph-kearney-shahid-barkat-a-python-package-for-grappling-with-missing-data-pydata-la-2019.json b/pydata-la-2019/videos/joseph-kearney-shahid-barkat-a-python-package-for-grappling-with-missing-data-pydata-la-2019.json index f4d6758df..4478739df 100644 --- a/pydata-la-2019/videos/joseph-kearney-shahid-barkat-a-python-package-for-grappling-with-missing-data-pydata-la-2019.json +++ b/pydata-la-2019/videos/joseph-kearney-shahid-barkat-a-python-package-for-grappling-with-missing-data-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Real-world data is messy and missing, yet most statistical models\nrequire it to be clean and complete. Analysts are often well versed in\nmodeling, but few are familiar with handling missingness. This talk\nteaches data professionals best practices for dealing with missingness\nand introduces Autoimpute, our Python package that helps users grapple\nwith missing data during statistical analysis.\n\nMost real-world datasets contain missing data, but many statistical\nmodels expect input datasets to be complete. This disconnect requires\nanalysts to figure out what to do about missing data before they can\nproceed with statistical analysis.\n\nUnfortunately, most aspiring data professionals spend the bulk of their\ntime studying statistical models themselves, not techniques to handle\nmissing data. As a result, individuals opt for simple methods such as\nlistwise deletion or mean imputation and underestimate the impact these\nmethods have on parameter inference of statistical models.\n\nThis problem inspired us to create Autoimpute, a Python package that\noffers a framework for properly handling missing data during end-to-end\nanalysis. In this talk, we focus on handling missing data during\nregression analysis, and we demonstrate how to use Autoimpute to tackle\nthe problem methodologically.\n\nTo start, we provide context to understand different types of missing\ndata, and we define terminology used in the remainder of the talk. We\nthen walk through examples that contain different types of missingness.\nEach example use a four-step methodology we developed to perform\nstatistical analysis with missing data. We start by assessing the extent\nof the missing data problem using descriptive and visual measures. We\nend by measuring the impact of imputation on the bias and variance of\nparameters derived from regression models built on imputed data.\n\nBy the end of the talk, each listener should leave equipped with a\nmethodological approach to handling missing data during statistical\nanalysis. Additionally, the audience should feel comfortable using the\nAutoimpute package as a tool in their Python data analysis ecosystem.\n", "duration": 2826, "language": "eng", - "published_at": "2019-12-23T21:03:06.000Z", "recorded": "2019-12-04", "speakers": [ "Joseph Kearney", diff --git a/pydata-la-2019/videos/juan-s-vasquez-web-scraping-with-beautifulsoup-yelps-api-pydata-la-2019.json b/pydata-la-2019/videos/juan-s-vasquez-web-scraping-with-beautifulsoup-yelps-api-pydata-la-2019.json index f9f3e6d83..4664014ee 100644 --- a/pydata-la-2019/videos/juan-s-vasquez-web-scraping-with-beautifulsoup-yelps-api-pydata-la-2019.json +++ b/pydata-la-2019/videos/juan-s-vasquez-web-scraping-with-beautifulsoup-yelps-api-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "In this tutorial, we will explore web scraping basics using requests,\nBeautifulSoup, and the Yelp API. The tutorial will be split up into two\nuse cases, the first scraping a business directory and the second Yelp\nbusiness listngs. This hands-on tutorial will be played out using a\nJupyter Notebook.\n\nParticipants will need to create a Yelp Developer Account.\n\nIn this web scraping tutorial we will cover the following:\n\n1. Why data enrichment and web scraping can be helpful - operational\n insights, resource allocation, business intelligence\n\n2. Core puzzle pieces that work together to scrape - requests,\n BeautifulSoup, pandas, for loops, f-strings\n\n3. Yellow Pages Web Scrape with BeautifulSoup: Scrape a top category of\n the Yellow Pages, turn into df and export as csv\n\n4. Yelp Scrape using Yelp Fusion API: Scrape Yelp listings around\n specific locations\n\n5. Summarize & Debrief\n", "duration": 4045, "language": "eng", - "published_at": "2019-12-23T21:02:23.000Z", "recorded": "2019-12-03", "speakers": [ "Juan S Vasquez" diff --git a/pydata-la-2019/videos/kevin-chrzanowski-bokeh-maps-interactive-map-for-your-next-web-application-pydata-la-2019.json b/pydata-la-2019/videos/kevin-chrzanowski-bokeh-maps-interactive-map-for-your-next-web-application-pydata-la-2019.json index 17138e37e..97f0cd9d6 100644 --- a/pydata-la-2019/videos/kevin-chrzanowski-bokeh-maps-interactive-map-for-your-next-web-application-pydata-la-2019.json +++ b/pydata-la-2019/videos/kevin-chrzanowski-bokeh-maps-interactive-map-for-your-next-web-application-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Data scientists, analysts, and developers are proficient in creating\nstatic maps but what about interactive ones? An interactive map of Los\nAngeles neighborhoods will be made via Python\u2019s Bokeh library to show\nhow quickly one can be built.\n\nMaps are crucial for showing differences between regions. However, what\nhappens when users want to see regional changes over a range of time\nperiods for a variety of variables? One map may not be enough. Rather\nthan creating many maps for all your user\u2019s needs, an interactive map\nallows the viewer to choose the time period and variables of interest.\nIn order to get a better sense of the process of making an interactive\nmap, a map of Los Angeles neighborhoods will be built live using\nPython\u2019s Bokeh library.\n\nThe presentation will cover how to add features and widgets such as a\nhovertool, selection form, and mouse selection click to a map. We will\nalso go over the important modify\\_doc and update functions that are\ncrucial for an interactive map to be able to respond to input changes\nfrom a viewer. We\u2019ll deploy to a Google Cloud server, discuss when to\nbuild an interactive map, and best practices for designing one.\n\nKnowledge of certain data visual python libraries such as matplotlib or\nseaborn, in addition to basic python functionality, will be assumed.\nThis is a presentation geared for intermediate python users.\n", "duration": 2133, "language": "eng", - "published_at": "2019-12-23T21:03:30.000Z", "recorded": "2019-12-04", "speakers": [ "Kevin Chrzanowski" diff --git a/pydata-la-2019/videos/kyle-polich-keynote-pydata-la-2019.json b/pydata-la-2019/videos/kyle-polich-keynote-pydata-la-2019.json index 0cb6cfb72..0792407f4 100644 --- a/pydata-la-2019/videos/kyle-polich-keynote-pydata-la-2019.json +++ b/pydata-la-2019/videos/kyle-polich-keynote-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "www.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 3599, "language": "eng", - "published_at": "2019-12-23T21:03:40.000Z", "recorded": "2019-12-04", "speakers": [ "Kyle Polich" diff --git a/pydata-la-2019/videos/leland-mcinnes-topological-techniques-for-unsupervised-learning-pydata-la-2019.json b/pydata-la-2019/videos/leland-mcinnes-topological-techniques-for-unsupervised-learning-pydata-la-2019.json index d10669dc6..5a1b6b26d 100644 --- a/pydata-la-2019/videos/leland-mcinnes-topological-techniques-for-unsupervised-learning-pydata-la-2019.json +++ b/pydata-la-2019/videos/leland-mcinnes-topological-techniques-for-unsupervised-learning-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Many topics in unsupervised learning can be viewed as dealing with the\nrelative geometry of data. In mathematics, topology and homotopy theory\nare the fields that deal with similar kinds of questions. Using ideas,\ntechniques, and language from topology can prove fruitful for\nunsupervised learning. This talk will introduce you to the ideas and\nintuitions for this, and provide meaningful examples.\n\nMany topics in unsupervised learning can be viewed as dealing with the\nrelative geometry of data. In mathematics, topology and homotopy theory\nare the fields that deal with similar kinds of questions. Using ideas,\ntechniques, and language from topology can prove fruitful for\nunsupervised learning. This talk will look at how topological approaches\ncan be brought to bear upon unsupervised learning problems as diverse as\ndimension reduction, clustering, anomaly detection, word embedding, and\nmetric learning. Through the lens and language of topology and category\ntheory we can draw common threads through all these topics, pointing the\nway toward new approaches to these problems. By focusing on broad ideas\nand intuitions, and working with example uses, you don't need a\nbackground in topology to understand the approach. I hope to convince\nyou that topological approaches offer a rich and growing field of\nresearch for unsupervised learning.\n", "duration": 2426, "language": "eng", - "published_at": "2019-12-23T21:03:45.000Z", "recorded": "2019-12-04", "speakers": [ "Leland McInnes" diff --git a/pydata-la-2019/videos/lightning-talks-pydata-la-2019.json b/pydata-la-2019/videos/lightning-talks-pydata-la-2019.json index 8b3f49151..54bab425a 100644 --- a/pydata-la-2019/videos/lightning-talks-pydata-la-2019.json +++ b/pydata-la-2019/videos/lightning-talks-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "www.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 2209, "language": "eng", - "published_at": "2019-12-23T21:03:50.000Z", "recorded": "2019-12-04", "speakers": [ "Various speakers" diff --git a/pydata-la-2019/videos/manu-flores-analyzing-genetic-networks-using-neural-networks-pydata-la-2019.json b/pydata-la-2019/videos/manu-flores-analyzing-genetic-networks-using-neural-networks-pydata-la-2019.json index 9d86ca237..e0bcadd9b 100644 --- a/pydata-la-2019/videos/manu-flores-analyzing-genetic-networks-using-neural-networks-pydata-la-2019.json +++ b/pydata-la-2019/videos/manu-flores-analyzing-genetic-networks-using-neural-networks-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "In this tutorial, we'll delve into the depths of biological data\nanalysis. Using publicly available datasets, we'll use machine learning\nto try to solve one of life's biggest mysteries: that of completing the\nwiring diagrams of genetic regulatory networks.\n\n**Genes that fire together wire together**\n\nIn every living cell, there are genetic regulatory networks that dictate\nhow genes are turned on and off. This networks have evolved to help the\ncell to fine-tune the number and speed of the biomolecules that make up\nthe cell. Despite studying gene networks for more than 30 years in model\norganisms, the community still faces some problems. The problem we're\ngoing to address in this tutorial is to try to make guesses of the\n\"missing wires\" of this gene networks.\n\nWe'll be using the Keras API to build our neural nets and pandas / numpy\n/ sci-kit learn to wrangle through this massive datasets. Using publicly\navailable RNAseq datasets, we'll train a neural network to predict the\nbiological module of some of the missing nodes in the network. We'll\nalso use the NetworkX library to work with the genetic networks.\n", "duration": 4723, "language": "eng", - "published_at": "2019-12-23T21:00:27.000Z", "recorded": "2019-12-03", "speakers": [ "Manu Flores" diff --git a/pydata-la-2019/videos/manu-gopinathan-malte-loller-andersen-reinforcement-learning-pac-man-pydata-la-2019.json b/pydata-la-2019/videos/manu-gopinathan-malte-loller-andersen-reinforcement-learning-pac-man-pydata-la-2019.json index d6206eb9a..119d5d04e 100644 --- a/pydata-la-2019/videos/manu-gopinathan-malte-loller-andersen-reinforcement-learning-pac-man-pydata-la-2019.json +++ b/pydata-la-2019/videos/manu-gopinathan-malte-loller-andersen-reinforcement-learning-pac-man-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "This workshop serves as an introduction to reinforcement learning where\nthe participants will implement a Pac-Man agent. The Pac-Man agent will\nlearn how to solve different maps using Q-learning and Deep Q-learning.\nWe start out by exploring Q-learning, before diving into deep\nQ-learning, which utilizes neural networks. Jupyter notebook and GPUs\nwill be used to aid us in our work.\n\nOver the past few years, reinforcement learning (RL) has achieved\npromising results and it is currently being explored in a wide range of\nfields. In areas such as self driving cars, gaming and medicine, RL is\nthe frontier of state- of-the-art results. In this workshop we will\nexplore what the fuss is all about!\n\nThis workshop serves as an introduction to reinforcement learning where\nthe participants will implement a Pac-Man agent. The Pac-Man agent will\nlearn how to solve different maps using Q-learning and Deep Q-learning.\nWe start out by exploring Q-learning, a cornerstone in RL. Expanding\nfurther, we continue on to deep Q-learning, which utilizes neural\nnetworks. The code is executed in the cloud on Jupyter notebooks, and\nfor training the neural networks we use GPUs in the cloud. Everything is\nwritten in Python.\n\nNo prior knowledge of reinforcement learning is necessary.\n\nIf reinforcement learning has been a mysterious domain to you, this\nsession will most likely leave you with a greater understanding of the\nprocess and aid you in how to set up projects of your own.\n\n| **Optional preparation for the tutorial**\n| The tutorial will be much like a walkthrough, so it is quite fine to\n just follow along without programming yourself. If you do, however,\n want to interact with the actual code, it is recommended that you\n clone the project and set up the Python environment beforehand. While\n most of the tutorial will be in Jupyter Notebook, some setup is also\n required on local machines. Since we only have 1.5 hours, we will not\n have time to help individual participants setting up the project\n during the actual tutorial, but if you do face issues you can send us\n an email ahead of the tutorial. We will be happy to help!\n\n| Project repository: https://github.com/knowit/ml-pacman\n| Manu: `[email protected] `__\n| Malte: `[email protected] `__\n", "duration": 5456, "language": "eng", - "published_at": "2019-12-23T21:00:35.000Z", "recorded": "2019-12-03", "speakers": [ "Manu Gopinathan", diff --git a/pydata-la-2019/videos/maria-khalusova-machine-learning-model-evaluation-metrics-pydata-la-2019.json b/pydata-la-2019/videos/maria-khalusova-machine-learning-model-evaluation-metrics-pydata-la-2019.json index 2f0fc9327..4a9571096 100644 --- a/pydata-la-2019/videos/maria-khalusova-machine-learning-model-evaluation-metrics-pydata-la-2019.json +++ b/pydata-la-2019/videos/maria-khalusova-machine-learning-model-evaluation-metrics-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Choosing the right evaluation metric for your machine learning project\nis crucial, as it decides which model you\u2019ll ultimately use. How do you\nchoose an appropriate metric? This talk will explore the important\nevaluation metrics used in regression and classification tasks, their\npros and cons, and how to make a smart decision.\n\nIn this talk, we'll go through evaluation metrics for regression tasks\n(R squared, MAE, MSE, RMSE, and RMSLE) and classification tasks\n(Classification accuracy, Precision, Recall, F1 Score, ROC/AUC,\nPrecision/Recall AUC, Matthews Correlation Coefficient, and ways to\nextend some of these from binary to multiclass problems). I'll talk\nabout the differences between them, the trade- offs, and when some may\nbe more helpful than others.\n", "duration": 2388, "language": "eng", - "published_at": "2019-12-23T21:04:21.000Z", "recorded": "2019-12-05", "speakers": [ "Maria Khalusova" diff --git a/pydata-la-2019/videos/matthew-seal-data-and-etl-with-notebooks-in-papermill-pydata-la-2019.json b/pydata-la-2019/videos/matthew-seal-data-and-etl-with-notebooks-in-papermill-pydata-la-2019.json index 49b090e67..d902dd1fd 100644 --- a/pydata-la-2019/videos/matthew-seal-data-and-etl-with-notebooks-in-papermill-pydata-la-2019.json +++ b/pydata-la-2019/videos/matthew-seal-data-and-etl-with-notebooks-in-papermill-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Notebooks have traditionally been a tool for drafting code and avoiding\nrepeated expensive computations while exploring solutions. However, with\nnew tools like nteract's papermill and scrapbook libraries, this\ntechnology has been expanded to make a reusable and parameterizable\ntemplate for execution. We'll look at how to make use of this pattern\nfor Data and ETL processes.\n\nIntro\n~~~~~\n\n- Myself, Netflix, and Why I'm here\n- What does a Data Platform Team do?\n- Projects and Open Source tools discussed in presentation Papermill,\n Jupyter, nteract, etc\n\nNotebooks\n---------\n\nWhat are Jupyter Notebooks?\n~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nWe'll some visual examples and breakdowns of notebooks.\n\nHow Notebook Work\n~~~~~~~~~~~~~~~~~\n\nA guide through how a notebook executes and the model it uses to run\nyour code.\n\nTraditional Use Cases\n~~~~~~~~~~~~~~~~~~~~~\n\nAround experimentation and code development.\n\nNew Use Cases\n~~~~~~~~~~~~~\n\nFor production data and operations without full rewrites of Notebook\ncode.\n\nPapermill\n---------\n\nWhat is papermill?\n~~~~~~~~~~~~~~~~~~\n\n`papermill `__ is a library\nfor executing notebooks programmatically.\n\nHow do you use it?\n~~~~~~~~~~~~~~~~~~\n\nYou'll see some examples in Python and with it's provided CLI.\n\nHow does it fit into the Notebook model?\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nWe'll relate the execution back into original Notebook execution\ndiagrams.\n\nHow to extend papermill\n~~~~~~~~~~~~~~~~~~~~~~~\n\nQuick pointer to the extensibility of the library and how to add new\nfunctionality.\n\nUsing papermill in production data pipelines\n--------------------------------------------\n\nOperationalizing Notebooks\n~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nFailure analysis, Productionalization, Sharing executions...\n\nDags of Notebooks\n~~~~~~~~~~~~~~~~~\n\nMaking a pipeline with Notebooks.\n\nIntegration Testing\n~~~~~~~~~~~~~~~~~~~\n\nGood practices Where unittesting doesn't fit\n\n@ Netflix usage\n---------------\n\nQuick blip about adoption and usage at Netflix.\n\nRelated libraries (time pending)\n--------------------------------\n\nScrapbook\n~~~~~~~~~\n\nCommuter / NBViewer\n~~~~~~~~~~~~~~~~~~~\n", "duration": 2338, "language": "eng", - "published_at": "2019-12-23T21:03:10.000Z", "recorded": "2019-12-04", "speakers": [ "Matthew Seal" diff --git a/pydata-la-2019/videos/michelle-brenner-how-to-get-started-with-server-less-on-google-amazon-microsoft-pydata-la-2019.json b/pydata-la-2019/videos/michelle-brenner-how-to-get-started-with-server-less-on-google-amazon-microsoft-pydata-la-2019.json index 155db9b9e..2bf241558 100644 --- a/pydata-la-2019/videos/michelle-brenner-how-to-get-started-with-server-less-on-google-amazon-microsoft-pydata-la-2019.json +++ b/pydata-la-2019/videos/michelle-brenner-how-to-get-started-with-server-less-on-google-amazon-microsoft-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Serverless apps are everywhere these days, but how do you get started?\nHow do you decide which provider to go with? I have created the same\nPython app with 3 services, AWS, Google Cloud & Microsoft Azure. I will\ngo over the highlights and lowlights, and how to judge what\u2019s right for\nyour project.\n\nServerless apps are everywhere these days, but how do you get started?\nHow do you decide which provider to go with? I have created the same\nPython based app with 3 services, AWS, Google Cloud & Microsoft Azure. I\nwill go over the highlights and lowlights, and how to judge what\u2019s right\nfor your project.\n\nMy goal for creating this app was to answer these questions. What\nservices are available for which languages, but especially Python? How\nfast can I get to coding? How do I programmatically define the\ninfrastructure? How do I test everything? How is their CI/CD pipeline?\nHow do I secure the app? How can I monitor what is going on?\n\nDuring my research I ran into a common problem. There are guides on how\nto use the UI to create samples. There is technical documentation that\nwill give you the name of the flag you are looking for. There is rarely\na user story to help you go from nothing to a running application. My\nuser story is that I wanted an endpoint that allows you to add data to\npersistent storage, get, edit and delete that data. Seems\nstraightforward, but there is a steep learning curve that had me tearing\nmy hair out. This talk is not just to give you a better understanding of\nserverless offerings, but to keep you from getting discouraged at the\ninevitable blockers. At the end of this talk you\u2019ll walk out with the\nconfidence to create your first serverless microservice and smash any\nwalls you hit along the way.\n", "duration": 2331, "language": "eng", - "published_at": "2019-12-23T21:03:14.000Z", "recorded": "2019-12-04", "speakers": [ "Michelle Brenner" diff --git a/pydata-la-2019/videos/milana-lewis-keynote-pydata-la-2019.json b/pydata-la-2019/videos/milana-lewis-keynote-pydata-la-2019.json index 0c6f6cdd2..f7a913265 100644 --- a/pydata-la-2019/videos/milana-lewis-keynote-pydata-la-2019.json +++ b/pydata-la-2019/videos/milana-lewis-keynote-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "www.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 2668, "language": "eng", - "published_at": "2019-12-23T21:03:19.000Z", "recorded": "2019-12-04", "speakers": [ "Milana Lewis" diff --git a/pydata-la-2019/videos/nick-acosta-open-sourcing-natural-disaster-preparedness-and-relief-pydata-la-2019.json b/pydata-la-2019/videos/nick-acosta-open-sourcing-natural-disaster-preparedness-and-relief-pydata-la-2019.json index 6e10887b6..1e1f4007f 100644 --- a/pydata-la-2019/videos/nick-acosta-open-sourcing-natural-disaster-preparedness-and-relief-pydata-la-2019.json +++ b/pydata-la-2019/videos/nick-acosta-open-sourcing-natural-disaster-preparedness-and-relief-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "This talk will provide an overview of the past few years of Call for\nCode and introduce IBM Code and Response, IBM's effort to enable,\ndeploy, and create innovative solutions to reduce the impact of natural\ndisasters through open source.\n\nNatural disasters are among the world's greatest challenges, and have\ndevastating effects both locally, from the many fires that have occurred\nin Southern and Northern California (over 17 million acres of land have\nbeen lost to wildfire in the US in the last 2 years), and globally. IBM,\nwith help a large partnership group including the United Nations and\nAmerican Red Cross, developed Call for Code, a global contest\nchallenging developers to build technologies to aide those going through\nadversity. This talk will provide an overview of the past few years of\nCall for Code and introduce IBM Code and Response, IBM's effort to\nenable, deploy, and create innovative solutions to reduce the impact of\nnatural disasters through open source. The talk will also highlight some\nnovel ways developers have used Python in order to accomplish these\ngoals.\n", "duration": 1731, "language": "eng", - "published_at": "2019-12-23T21:04:28.000Z", "recorded": "2019-12-05", "speakers": [ "Nick Acosta" diff --git a/pydata-la-2019/videos/paul-anzel-git-ting-along-with-others-pydata-la-2019.json b/pydata-la-2019/videos/paul-anzel-git-ting-along-with-others-pydata-la-2019.json index 58e7cb165..ec85fe81e 100644 --- a/pydata-la-2019/videos/paul-anzel-git-ting-along-with-others-pydata-la-2019.json +++ b/pydata-la-2019/videos/paul-anzel-git-ting-along-with-others-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Have you emailed snippets of code with your colleagues, only to get\nstuck trying to find which email has the right code? In this tutorial, I\nwill demonstrate how to collaborate on code with others using Git. We\nwill cover branching, opening pull requests, doing code reviews,\nresolving code conflicts, and undoing any mistakes you may make along\nthe way. This talk assumes basic Git knowledge.\n\nPrerequisites\n=============\n\nThis tutorial will assume that participants have basic Git knowledge,\nthough anyone who is interested is welcome to attend. If you are\nunfamiliar with Git, I recommend `Software Carpentry's\ntutorial `__.\n\nWe will be doing some Git work together, so participants should have\n`Git installed `__ and should have a personal\n`GitHub account `__. If participants would prefer\nto use a graphical interface, I recommend using\n`SourceTree `__. Note that to install\nSourceTree, you will need to create a personal BitBucket account.\n\nMaterial\n========\n\nCollaboration\n-------------\n\n- What happens when multiple people try to commit code at once?\n- Using branches and merges\n\nPull requests\n-------------\n\n- Opening issues in GitHub\n- Creating a pull request\n- Doing code reviews\n- Fixing conflicts\n- Merging your code back in\n\nFixing mistakes (as time permits)\n---------------------------------\n\n- Reverts and resets\n- Cherry-picking\n- Whoops, I just committed my password...\n", "duration": 4908, "language": "eng", - "published_at": "2019-12-23T21:02:38.000Z", "recorded": "2019-12-03", "speakers": [ "Paul Anzel" diff --git a/pydata-la-2019/videos/raul-maldonado-ab-testing-in-python-pydata-la-2019.json b/pydata-la-2019/videos/raul-maldonado-ab-testing-in-python-pydata-la-2019.json index 2fd2cd4c7..a97af6fda 100644 --- a/pydata-la-2019/videos/raul-maldonado-ab-testing-in-python-pydata-la-2019.json +++ b/pydata-la-2019/videos/raul-maldonado-ab-testing-in-python-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Python development has had a great emergence in development of\nstatistical packages, algorithms, and implementations. However, with the\ndevelopment and ease of practicing statistics & algorithms, there are\nstill some rules and constraints one must follow to obtain quality\nsolutions. And that is especially true with AB Testing, a statistical\nprocedure to provide data- driven insights in uncertainty.\n\nThis will be a breakout Session on frequentist AB Testing in python.\n\nWe'll explore the jungle of application and statistical methodology and\npractice with examples of Click Through Rates, the early metrics of\nchoice for AB Testing in production. That being said, compared to your\nlast statistics course you may have taken in the past, there are still\nsome rules and constraints one must follow to obtain quality solutions.\n", "duration": 2369, "language": "eng", - "published_at": "2019-12-23T21:04:06.000Z", "recorded": "2019-12-04", "speakers": [ "Raul Maldonado" diff --git a/pydata-la-2019/videos/ravin-kumar-making-data-relevant-to-business-its-harder-than-you-think-pydata-la-2019.json b/pydata-la-2019/videos/ravin-kumar-making-data-relevant-to-business-its-harder-than-you-think-pydata-la-2019.json index 1bcfdf5b1..6043f3ce6 100644 --- a/pydata-la-2019/videos/ravin-kumar-making-data-relevant-to-business-its-harder-than-you-think-pydata-la-2019.json +++ b/pydata-la-2019/videos/ravin-kumar-making-data-relevant-to-business-its-harder-than-you-think-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Want to use your data skills to empower an organization and influence\nyour companies strategy? Unfortunately its not as easy as just starting\na jupyter notebook and importing Tensorflow. Making your analysis change\nhearts and minds take more than math and code, and is more challenging\nthan just the algorithm. In this talk I'll share tips of how to make\nyour data work resonate across your org.\n\nMaking data relevant to your company isn't just math and code. Business\nskills are required as well. Many data folks, including myself in the\npast, underestimate these \"soft skills\" and end up feeling frustrated\nwhen your analyses don't have the recognition or impact that you expect.\n\nIn this talk I'll focus on the soft skills of data, and connect these\nskills to \"hard data\". These include topics such as , story telling,\ncoalition building, empathizing with stakeholders, making subjective\ncalls, and negotiation.\n\nIn particular I'll talk about my own failures and my most helpful\nresources outside of the data \"sphere\" which have helped me pick up and\npractice each of these topics. Ultimately by pairing these human skills\nwith your data skills you'll better be able to make your voice heard and\ninfluence that change that you're looking for.\n", "duration": 2740, "language": "eng", - "published_at": "2019-12-23T21:03:01.000Z", "recorded": "2019-12-04", "speakers": [ "Ravin Kumar" diff --git a/pydata-la-2019/videos/richard-liaw-a-guide-to-modern-hyperparameters-turning-algorithms-pydata-la-2019.json b/pydata-la-2019/videos/richard-liaw-a-guide-to-modern-hyperparameters-turning-algorithms-pydata-la-2019.json index 44ed3f864..89ed1d98a 100644 --- a/pydata-la-2019/videos/richard-liaw-a-guide-to-modern-hyperparameters-turning-algorithms-pydata-la-2019.json +++ b/pydata-la-2019/videos/richard-liaw-a-guide-to-modern-hyperparameters-turning-algorithms-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Modern deep learning model performance is very dependent on the choice\nof model hyperparameters, and the tuning process is a major bottleneck\nin the machine learning pipeline. In this talk, we will overview modern\nmethods for hyperparameter tuning and demonstrate how to use Tune, a\nscalable hyperparameter tuning library. Tune is completely open source\nat http://tune.io.\n\nThis talk will target intermediate to advanced data scientists and\nresearchers familiar with deep learning. The talk will first motivate\nthe need for advancements in hyperparameter tuning methods. The talk\nwill then overview standard methods for hyperparameter tuning: grid\nsearch, random search, and bayesian optimization. Then, we will motivate\nand discuss cutting edge methods for hyperparameter tuning:\nmulti-fidelity bayesian optimization, successive halving algorithms\n(HyperBand), and population-based training.\n\nThe talk will then present a overview of Tune, a scalable hyperparameter\ntuning system from the UC Berkeley RISELab, and demonstrate about how\nusers can leverage cutting edge hyperparameter tuning methods\nimplemented in Tune to quickly improve the performance of standard deep\nlearning models.\n", "duration": 2291, "language": "eng", - "published_at": "2019-12-23T21:03:56.000Z", "recorded": "2019-12-04", "speakers": [ "Richard Liaw" diff --git a/pydata-la-2019/videos/rodolfo-bonnin-a-supremely-light-introduction-to-quantum-computing-pydata-la-2019.json b/pydata-la-2019/videos/rodolfo-bonnin-a-supremely-light-introduction-to-quantum-computing-pydata-la-2019.json index 9a669f482..e1cd52cf1 100644 --- a/pydata-la-2019/videos/rodolfo-bonnin-a-supremely-light-introduction-to-quantum-computing-pydata-la-2019.json +++ b/pydata-la-2019/videos/rodolfo-bonnin-a-supremely-light-introduction-to-quantum-computing-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Quantum Computing is here and now, and its uses for solving real-world\nproblems is getting nearer. This talk will give you the tools to\nunderstand the main concepts of Quantum Information, and even perform\nyour first steps by running your own programs on real hardware using\nQiskit and Rigetti Forest.\n\nQuantum Computing is one of the most disruptive and fast-paced\ndisciplines of the present time.\n\nFor Data Science practitioners, understanding the main elements of the\nnew paradigm (Superposition, measurement, entanglement, Qubits, etc) can\nbe a daunting task. In this talk, we will walk through all these\nconcepts and will use Python to understand and probe them.\n\nThe talk will focus on direct and math-light examples, with accompanying\npython code examples illustrating the differences between simulating the\nquantum world ideally, an then observing introduced error of running\ncode in the noisy environment of real IBM and Rigetti's devices.\n\nThe outline of the talk is as follows:\n\n- The need for a new kind of computing\n- Quantum phenomena and how it can be used for computing.\n- Quantum circuits and algorithms: Manipulating quantum data.\n- Simulating and real hardware running of Python-based examples\n (Qiskit, Rigetti Forest SDK, Pennylane)\n- Conclusion\n", "duration": 2532, "language": "eng", - "published_at": "2019-12-23T21:03:24.000Z", "recorded": "2019-12-04", "speakers": [ "Rodolfo Bonnin" diff --git a/pydata-la-2019/videos/sameer-singh-keynote-pydata-la-2019.json b/pydata-la-2019/videos/sameer-singh-keynote-pydata-la-2019.json index 6f93d28e0..8863605ec 100644 --- a/pydata-la-2019/videos/sameer-singh-keynote-pydata-la-2019.json +++ b/pydata-la-2019/videos/sameer-singh-keynote-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "www.pydata.org\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 2854, "language": "eng", - "published_at": "2019-12-23T21:05:22.000Z", "recorded": "2019-12-05", "speakers": [ "Sameer Singh" diff --git a/pydata-la-2019/videos/sujit-pal-building-named-entity-recognition-models-efficiently-using-nerds-pydata-la-2019.json b/pydata-la-2019/videos/sujit-pal-building-named-entity-recognition-models-efficiently-using-nerds-pydata-la-2019.json index 2a955fc59..634f181e4 100644 --- a/pydata-la-2019/videos/sujit-pal-building-named-entity-recognition-models-efficiently-using-nerds-pydata-la-2019.json +++ b/pydata-la-2019/videos/sujit-pal-building-named-entity-recognition-models-efficiently-using-nerds-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Named Entity Recognition (NER) is foundational for many downstream NLP\ntasks. The Open Source NERDS toolkit provides algorithms that can be\nused to quickly build and evaluate NER models from labeled data such as\nIOB. New algorithms can be added with minimal effort. This presentation\nwill demonstrate how to create and evaluate new NER models using NERDS,\nas well as add new NER algorithms to it.\n\nNamed Entity Recognition (NER) is foundational for many downstream NLP\ntasks such as Information Retrieval, Relation Extraction, Question\nAnswering, and Knowledge Base Construction. While many high-quality\npre-trained NER models exist, they usually cover a small subset of\npopular entities such as people, organizations, and locations. But what\nif we need to recognize domain specific entities such as proteins,\nchemical names, diseases, etc? The Open Source Named Entity Recognition\nfor Data Scientists (NERDS) toolkit, from the Elsevier Data Science\nteam, was built to address this need.\n\nNERDS aims to speed up development and evaluation of NER models by\nproviding a set of NER algorithms that are callable through the familiar\nscikit-learn style API. The uniform interface allows reuse of code for\ndata ingestion and evaluation, resulting in cleaner and more\nmaintainable NER pipelines. In addition, customizing NERDS by adding new\nand more advanced NER models is also very easy, just a matter of\nimplementing a standard NER Model class.\n\nOur presentation will describe the main features of NERDS, then walk\nthrough a demonstration of developing and evaluating NER models that\nrecognize biomedical entities. We will then describe a Neural Network\nbased NER algorithm (a Bi-LSTM seq2seq model written in Pytorch) that we\nwill then integrate into the NERDS NER pipeline.\n\nWe believe NERDS addresses a real need for building domain specific NER\nmodels quickly and efficiently. NER is an active field of research, and\nthe hope is that this presentation will spark interest and contributions\nof new NER algorithms and Data Adapters from the community that can in\nturn help to move the field forward.\n", "duration": 2071, "language": "eng", - "published_at": "2019-12-24T02:48:31.000Z", "recorded": "2019-12-05", "speakers": [ "Sujit Pal" diff --git a/pydata-la-2019/videos/tim-orme-simplicity-for-scale-analyzing-15-million-dna-samples-with-python-pydata-la-2019.json b/pydata-la-2019/videos/tim-orme-simplicity-for-scale-analyzing-15-million-dna-samples-with-python-pydata-la-2019.json index 4c2907c22..4db858595 100644 --- a/pydata-la-2019/videos/tim-orme-simplicity-for-scale-analyzing-15-million-dna-samples-with-python-pydata-la-2019.json +++ b/pydata-la-2019/videos/tim-orme-simplicity-for-scale-analyzing-15-million-dna-samples-with-python-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "When confronted with large-scale data challenges, we often reach for\ncomplex tools to help solve the problem. In the past 7 years, Ancestry\nDNA has amassed the largest collection of consumer genomic data in the\nworld, creating a new scaling challenge in the genomics world. We'll\nshow how picking simple tools in the Python ecosystem helped us solve\nmassive scaling challenges in production.\n\nWith data sets growing larger by the day, and the number of big-data\ntools growing right along with them, it can be daunting to select the\nright tool for the job. Sometimes, too, it's tempting to apply the\nlatest-and-greatest ones to the problems we're working on. But, more\noften than not, the simplest tool is the right one and, fortunately for\nus, that's where Python shines.\n\nIn a short 7 years, Ancestry has collected nearly 15 million DNA\nsamples. Data of this magnitude has proved to be a massive scaling\nchallenge for the production pipeline that must analyze that data set to\nproduce customer results every single day. This talk will tell the story\nof how that pipeline has evolved over the years, from a manual command\nline process, to a scheduled Hadoop pipeline, and finally into the\nPython-based, event-driven system we use today.\n\nThe talk will give a basic overview of our DNA test and cover our core\nrelative detection and ethnicity algorithms at a high level. I'll then\ndive into the constraints and specific challenges the pipeline presents,\nand how we decided to leverage Python & Celery to solve those problems.\nLastly, I'll describe the benefits of switching to Python, demonstrating\nthe simplicity, performance, and reduction in code it provided.\n", "duration": 3056, "language": "eng", - "published_at": "2019-12-23T21:04:39.000Z", "recorded": "2019-12-05", "speakers": [ "Tim Orme" diff --git a/pydata-la-2019/videos/to-production-and-beyond-managing-the-machine-learning-lifecycle-with-mlflow.json b/pydata-la-2019/videos/to-production-and-beyond-managing-the-machine-learning-lifecycle-with-mlflow.json index a6e493a6b..83ffa1773 100644 --- a/pydata-la-2019/videos/to-production-and-beyond-managing-the-machine-learning-lifecycle-with-mlflow.json +++ b/pydata-la-2019/videos/to-production-and-beyond-managing-the-machine-learning-lifecycle-with-mlflow.json @@ -1,7 +1,6 @@ { "description": "Building a machine learning model that runs locally on a laptop probably\nisn't generating any value, you have to get that model into production.\nThis talk will focus on getting Data Scientist and Data Engineers more\ncomfortable with the Machine Learning Lifecycle, and how the open source\ntool MLflow can help. Let's take our machine learning models to\nproduction and beyond!\n\nIntroduction\n~~~~~~~~~~~~\n\n- What is the machine learning lifecycle?\n- Why should I care about this?\n\nWhat is MLflow?\n~~~~~~~~~~~~~~~\n\n- High-level overview of this open source Python project\n\nWhat is model tracking?\n~~~~~~~~~~~~~~~~~~~~~~~\n\n- Demo how MLflow can easily be used to track and record experiments\n\nHow to build a reproducible project?\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n- Demo how to use MLflow to be able to reproduce model building\n\nHow to create models that can be run anywhere?\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n- Demo building a model with Apache Spark and deploy on a non-Apache\n Spark cluster.\n", "duration": 1871, - "published_at": "2020-01-02T20:17:31.000Z", "recorded": "2019-12-04", "speakers": [ "Amanda Moran" diff --git a/pydata-la-2019/videos/tom-goldenberg-kedro-mlflow-reproducible-and-versioned-data-pipelines-at-scale-pydata-la-2019.json b/pydata-la-2019/videos/tom-goldenberg-kedro-mlflow-reproducible-and-versioned-data-pipelines-at-scale-pydata-la-2019.json index b1e99a617..b03b9f8d4 100644 --- a/pydata-la-2019/videos/tom-goldenberg-kedro-mlflow-reproducible-and-versioned-data-pipelines-at-scale-pydata-la-2019.json +++ b/pydata-la-2019/videos/tom-goldenberg-kedro-mlflow-reproducible-and-versioned-data-pipelines-at-scale-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Kedro is a development workflow tool open sourced by QuantumBlack, a\nMcKinsey company. Many data science teams have started using the library\nfor their pipelines but are unsure how to integrate with other model\ntracking tools, such as MLflow. In this tutorial, we will give an\noverview of Kedro and MLflow and demo how to leverage the best of both.\n\nThe goal of this session is to demonstrate how Kedro and MLflow fit\ntogether in a scalable AI architecture. To start, we will give an\noverview of Kedro and an overview of MLflow: - What are they used for? -\nWhat functionality do they provide? - How do they compare as tools?\n\nNext, we will walk through a demo of a Kedro project that has MLflow\nintegrated into it. Finally, we will go over deployment options.\n\nThere will be time allocated at the end for Q&A.\n", "duration": 2933, "language": "eng", - "published_at": "2019-12-23T21:00:41.000Z", "recorded": "2019-12-03", "speakers": [ "Tom Goldenberg" diff --git a/pydata-la-2019/videos/vasily-ershov-gradient-boosting-for-data-with-both-numerical-and-text-features-pydata-la-2019.json b/pydata-la-2019/videos/vasily-ershov-gradient-boosting-for-data-with-both-numerical-and-text-features-pydata-la-2019.json index 7c5dceb22..4b39f8165 100644 --- a/pydata-la-2019/videos/vasily-ershov-gradient-boosting-for-data-with-both-numerical-and-text-features-pydata-la-2019.json +++ b/pydata-la-2019/videos/vasily-ershov-gradient-boosting-for-data-with-both-numerical-and-text-features-pydata-la-2019.json @@ -2,7 +2,6 @@ "description": "Some problems contain different types of data, including numerical,\ncategorical and text data. CatBoost is the first Gradient Boosting\nlibrary to have text features support out-of-the box. This talk will\nwalk you through main features of CatBoost library and explain how it\ndeals with text data.\n\nGradient boosting is a powerful machine-learning technique that achieves\nstate-of-the-art results in a variety of practical tasks. For a number\nof years, it has remained the primary method for learning problems with\nheterogeneous features, noisy data, and complex dependencies: web\nsearch, recommendation systems, weather forecasting, and many others.\n\nSome problems contain different types of data, including numerical,\ncategorical and text data. In this case the best solution is either\nbuilding new numerical features instead of text and categories and pass\nit to gradient boosting, or using out-of-the box solutions for that.\n\nCatBoost (https://catboost.ai/) is the first Gradient Boosting library\nto have text features support out-of-the box.\n\nCatBoost is a popular open-source gradient boosting library with a whole\nset of advantages:\n\n1. CatBoost is able to incorporate categorical features and text\n features in your data with no additional preprocessing.\n\n2. CatBoost has the fastest GPU and multi GPU training implementations\n of all the openly available gradient boosting libraries.\n\n3. CatBoost predictions are 20-60 times faster then in other open-source\n gradient boosting libraries, which makes it possible to use CatBoost\n for latency-critical tasks.\n\n4. CatBoost has a variety of tools to analyze your model.\n\nThis talk will walk you through main features of this library including\nthe way it works with texts.\n", "duration": 2243, "language": "eng", - "published_at": "2019-12-23T21:04:00.000Z", "recorded": "2019-12-04", "speakers": [ "Anna Veronika Dorogush" diff --git a/pydata-warsaw-2019/videos/adam-witkowski-predicting-flight-compensation-a-case-study-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/adam-witkowski-predicting-flight-compensation-a-case-study-pydata-warsaw-2019.json index 666cf15c3..fdb006433 100644 --- a/pydata-warsaw-2019/videos/adam-witkowski-predicting-flight-compensation-a-case-study-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/adam-witkowski-predicting-flight-compensation-a-case-study-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Under EU law, airplane passengers have the right to be compensated if\ntheir flight is sufficiently delayed. GIVT helps passengers file such\nclaims. Every claim needs to be verified as there are various conditions\nthat can invalidate it: extreme weather, strikes, bird hit, etc. In this\ntalk, I will describe a machine learning system which replaces manual\nverification of claims.\n\nThis talk describes the process of implementing a machine learning model\nin production. I will talk about various problems that we encountered\nand how we solved them.\n\nFirst, I will describe the problem: verification if the airline should\ncompensate the passenger for the flight. This depends on many factors,\nsome of which are easy to define (is the flight delayed more than 180\nminutes? is the airline from EU?) and some are not (is the weather bad\nenough to invalidate the claim? is there a strike?). Of course, with\nperfect data, those questions would be very easy, but in practice we do\nnot have the luxury of working with ideal data. For example, the weather\nreports are not available in real time and we know they are available\nfor the airports, not the whole route of the flight. I will tell you\nwhat data we had and what features we extracted from it.\n\nThen I will briefly describe the algorithms we used and why,\nunsurprisingly, we ended up using GBM. After the model was ready, we ran\nit in parallel to manual verification for several weeks so the\npredictions of the model could be compared to human work. One important\naspect of running the model in production is explaining the model\ndecisions to the verification team (and end users). I will talk about\ntechniques that can be used to 'explain' the model's decision, for\nexample, SHAP.\n\nThe main value of my talk will be practical lessons for solving a real\nbusiness problem with machine learning.\n", "duration": 1796, "language": "eng", - "published_at": "2020-01-03T02:09:49.000Z", "recorded": "2019-12-13", "speakers": [ "Adam Witkowski" diff --git a/pydata-warsaw-2019/videos/amit-beka-the-nlu-orchestra-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/amit-beka-the-nlu-orchestra-pydata-warsaw-2019.json index f40b63c91..0ccfa26eb 100644 --- a/pydata-warsaw-2019/videos/amit-beka-the-nlu-orchestra-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/amit-beka-the-nlu-orchestra-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Building a NLU application like a chatbot seems easy nowadays, but\ngetting it right architecturally is harder than expected. Let's dive\ninto the problems I've encountered and an elegant-yet-simple solution to\nmake it really work. We'll look at understanding conversation not as a\npipeline, but as an orchestra of many components playing together\ntowards a shared goal.\n\nFor the last 6 years I've been working on a complex NLU system to\nunderstand human discourse, and have seen many approaches and promises\nto solve it \"easily\". This talk will unfold the journey of our\narchitecture from a simple pipeline to more complex solutions,\nhighlighting the difficulties of current approaches to tackle real\nconversations with real users.\n\nLooking at the problem from a different angle, we'll develop an elegant\nstructure of NLU applications as an orchestra of many components.\n\nThe proposed solution naturally handles many problems:\n\n- Changing requirements, like new intents or entities\n- Non-linear dependencies between components\n- Using the full conversation as context\n", "duration": 1691, "language": "eng", - "published_at": "2020-01-02T15:29:34.000Z", "recorded": "2019-12-12", "speakers": [ "Amit Beka" diff --git a/pydata-warsaw-2019/videos/aydin-zielinski-trashasistant-a-kivy-app-which-uses-deep-neural-networks-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/aydin-zielinski-trashasistant-a-kivy-app-which-uses-deep-neural-networks-pydata-warsaw-2019.json index 60c63f122..7db4fe4f2 100644 --- a/pydata-warsaw-2019/videos/aydin-zielinski-trashasistant-a-kivy-app-which-uses-deep-neural-networks-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/aydin-zielinski-trashasistant-a-kivy-app-which-uses-deep-neural-networks-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "We have developed a mobile application using kivy framework. It uses\nDeep Neural Networks to help segregating trashes. Thanks to this app,\nbefore throwing trash to specific bin, you will know which trash bin you\nshould throw to. Only thing you will do is just taking a photo of the\ntrash via the app.\n\nMunicipality in Gdansk started to be really strict about segregating\ntrash last year. They are checking segregating performance of\nresidential buildings and providing opportunity to pay less tax to those\npeople who live in the buildings which have better segregation\nperformance. Also, they are fining companies which don't follow the\nsegregation instructions. Even though, instructions are well defined\nstill sometimes people are confused to decide which trash bin they\nshould use. For example, empty carton of milk seems like it should go to\nthe bin for paper, but actually it should go to the bin for plastic and\nmetals.\n\nWe have realized that having a mobile app which will guide people about\nthis purpose would be very helpful. We have developed deep neural\nnetworks(DNN) using transfer learning. DNNs have been trained by using\nkeras Python library. After having good performed model, kivy app has\nbeen developed.\n\nIn this speech, we would like to talk about transfer learning, Python\nkeras library, kivy framework, obstacles we had and future plans about\nadditional features to the application.\n", "duration": 1717, "language": "eng", - "published_at": "2020-01-02T16:33:51.000Z", "recorded": "2019-12-12", "speakers": [ "Olgun AYDIN", diff --git a/pydata-warsaw-2019/videos/boguszewski-jankowska-in-the-service-of-the-history-ai-in-archivistics-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/boguszewski-jankowska-in-the-service-of-the-history-ai-in-archivistics-pydata-warsaw-2019.json index 6b059eebf..298c458f9 100644 --- a/pydata-warsaw-2019/videos/boguszewski-jankowska-in-the-service-of-the-history-ai-in-archivistics-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/boguszewski-jankowska-in-the-service-of-the-history-ai-in-archivistics-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "A trillions of old photos are a valuable source of information about the\npast. Unfortunately most of them are not described sufficiently or at\nall. Imagine Artificial Intelligence as a friend of the archivist of the\n21st century. This is the end of unlabeled photos epoch.\n\n1839 is a data generally accepted as the birth year of practical\nphotography. Since then mankind produced about 10 quadrillions of photos\nincluding 1 quadrillion only last year. This huge amount of unlabeled an\nundescribed data is a problem, if we want to obtain important\ninformation quickly and efficiently. Old photos are extremely valuable,\nbecause they contain a lot of data about the past. However some\nexpertise and experience is needed to properly describe such images.\nWhat if we include all this knowledge into neural networks? Can AI\nbecome a friend of the 21st century archivist? Let\u2019s talk about\nautomatic image tagging and faces recognition in old photos.\n", "duration": 1772, "language": "eng", - "published_at": "2020-01-03T00:31:59.000Z", "recorded": "2019-12-12", "speakers": [ "Adrian Boguszewski", diff --git a/pydata-warsaw-2019/videos/bujak-rusiecki-how-we-personalized-onetpl-with-multi-armed-bandits-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/bujak-rusiecki-how-we-personalized-onetpl-with-multi-armed-bandits-pydata-warsaw-2019.json index eb09299a6..54256e0ec 100644 --- a/pydata-warsaw-2019/videos/bujak-rusiecki-how-we-personalized-onetpl-with-multi-armed-bandits-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/bujak-rusiecki-how-we-personalized-onetpl-with-multi-armed-bandits-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Imagine you need to choose ten articles out of hundreds in a way that\nmaximizes your profit. It's not as easy as it seems. In this talk, we\nwill explain how we prepare recommendations on the onet.pl home page for\nmillions of users with the use of a multi-armed bandit algorithm.\n\nMulti-armed bandits are a powerful solution for a diversity of\noptimization problems that demand a balance between using existing\nknowledge about item performance and acquiring new one. That's why we\nwould like to focus on the intuition behind the multi-armed bandit\napproach and its application in recommender systems on the example of\nonet.pl home page. Also, we will introduce E-greedy, UCB and Thompson\nSampling bandits, discuss their pros and cons and show how to tune them\nin a simulated environment.\n", "duration": 1610, "language": "eng", - "published_at": "2020-01-03T08:00:08.000Z", "recorded": "2019-12-13", "speakers": [ "Artur Bujak", diff --git a/pydata-warsaw-2019/videos/chris-sidebottom-tdd-shouldnt-be-tddious-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/chris-sidebottom-tdd-shouldnt-be-tddious-pydata-warsaw-2019.json index 038a5c893..3bd3bac3c 100644 --- a/pydata-warsaw-2019/videos/chris-sidebottom-tdd-shouldnt-be-tddious-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/chris-sidebottom-tdd-shouldnt-be-tddious-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "An introduction to applying TDD in a Data world. Taking the experience\nof traditional TDD from a Web Development background and translating it\ninto useful techniques for Data Scientists. Hopefully by the end of this\ntalk TDD will be far less of a buzzword and you'll enjoy applying it\nmore yourself!\n\nBeginning by introducing how I learnt TDD (Test Driven Development) from\nWeb Development. I\u2019ll walk through how the traditional way of using TDD\nsometimes doesn\u2019t apply to Data Science. By exploring examples of\ndata-oriented TDD, I\u2019ll show that even though TDD is a rigorous\npractice, it can also be fun. And how TDD can provide you with more\nspace to explore how to build software.\n\nBuilding upon the data testing, we\u2019ll look at how to apply TDD to\nmachine learning models and why it\u2019s tricky to build deterministic tests\nfor them. Then we\u2019ll bring it altogether with pipeline testing, how it\ncan be difficult and ways to create tests that proxy it. I'll also cover\nwhen not to use TDD, as that can ruin the fun.\n\nBy the end of the talk, you should have ideas for implementing TDD in\nyour workflow with data. And ways to convince people to play along with\nyou!\n", "duration": 1732, "language": "eng", - "published_at": "2020-01-03T08:00:08.000Z", "recorded": "2019-12-12", "speakers": [ "Chris Sidebottom" diff --git a/pydata-warsaw-2019/videos/cyrus-vahid-anyone-can-build-great-deep-learning-applications-deep-numpy-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/cyrus-vahid-anyone-can-build-great-deep-learning-applications-deep-numpy-pydata-warsaw-2019.json index 93e708dfb..02c4a921a 100644 --- a/pydata-warsaw-2019/videos/cyrus-vahid-anyone-can-build-great-deep-learning-applications-deep-numpy-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/cyrus-vahid-anyone-can-build-great-deep-learning-applications-deep-numpy-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "As deep learning becoming prevalent in adoption of practical AI, it is\nimportant to lower barrier to entry for DL adoption for the developer\ncommunity. This can be done by 1) retaining and reusing existing and\npopular libraries as much as possible and employing them in development\nof DL applications, and 2) Automating model development as much as\npossible though usage open source AutoML tools.\n\nDeep Numpy enhances Numpy by adding GPU support and parallel processing\nto it, while aspiring to remain 100% numpy compatible. It enables\neasy-to-use and easy-to-extend AutoML with a focus on deep learning, and\nmaking AutoML deploy in real-world applications.\n\nIn this talk, we focus on the use of Deep Numpy and AutoGluon for rapid\ndevelopment of DL models.\n", "duration": 1826, "language": "eng", - "published_at": "2020-01-02T16:36:33.000Z", "recorded": "2019-12-13", "speakers": [ "Cyrus Vahid" diff --git a/pydata-warsaw-2019/videos/dean-langsam-disease-modeling-with-scipy-and-pymc-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/dean-langsam-disease-modeling-with-scipy-and-pymc-pydata-warsaw-2019.json index 8dd1d0462..f3c5ac6c4 100644 --- a/pydata-warsaw-2019/videos/dean-langsam-disease-modeling-with-scipy-and-pymc-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/dean-langsam-disease-modeling-with-scipy-and-pymc-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Programs which aim eradicate disease must rely on interpretable models.\nThese models quickly become hard to solve, not to mention train on\nmissing parameters. Scipy and PyMC come to our rescue for the heavy\nlifting.\n\nIn 2018, Israel has seen the biggest outbreak of measles since the\nintroduction of a vaccine in the late 1960s. Nowadays, vaccine policies\nare not only decided by laboratory tests. Those tests are complemented\nby a plethora of computational epidemiology simulations predicting the\neffects of various vaccination policies on the entire population. A\npopulation-level policy to eradicate disease must rely on Interpretable\nmodels. These models quickly become hard to solve, not to mention train\non missing parameters. Using Scipy as a solver, and PyMC for Bayesian\ninference we are able to learn parameter distributions for missing\nnatural parameters, such as the disease's \"strength\" or\n\"infectiousness\". We can then use the underlying distributions for these\nparameters in order to simulate possible outcomes for future policies.\n", "duration": 1813, "language": "eng", - "published_at": "2020-01-02T15:28:21.000Z", "recorded": "2019-12-12", "speakers": [ "Dean Langsam" diff --git a/pydata-warsaw-2019/videos/dr-clement-walter-keras-fsl-fast-model-builder-for-production-ready-few-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/dr-clement-walter-keras-fsl-fast-model-builder-for-production-ready-few-pydata-warsaw-2019.json index 20444640e..937f3f33a 100644 --- a/pydata-warsaw-2019/videos/dr-clement-walter-keras-fsl-fast-model-builder-for-production-ready-few-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/dr-clement-walter-keras-fsl-fast-model-builder-for-production-ready-few-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Few shot learning aims at leveraging huge database for training deep\nneural nets models to be used onto problems with very few data. Among\nother methods we will focus on metric learning algorithms because they\nallow for immediate adaptation of the model in production. To develop\nsuch model, fast experiment is key; we will present a versatile\nframework for their implementation in tf.keras.\n\nMost of the industrial cases we face do not have enough data to allow\nfor a complete end-to-end training of common deep architecture.\nFurthermore research paper often do not address real test cases. In this\ncontext there is a need for easy benchmarking of usual and custom models\nonto ones particular datasets.\n\nFurthermore best academic performers may not the preferred choice for\nproduction applications as simplicity, robustness and explicability are\nother factor of interest. Thus the need for a modularity in the\nimplementation to be able to mix the best of them to improve practical\nresults.\n\nWe will review some recent theoretical development in Few Shot learning\nand show their corresponding implementation in tf.keras. Finally I will\nshowcase the keras\\_fsl package with public notebooks and key results on\nusual benchmarks.\n", "duration": 1440, "language": "eng", - "published_at": "2020-01-03T00:31:00.000Z", "recorded": "2019-12-12", "speakers": [ "Dr. Cl\u00e9ment Walter" diff --git a/pydata-warsaw-2019/videos/filip-geppert-how-to-effectively-extract-image-features-lets-play-with-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/filip-geppert-how-to-effectively-extract-image-features-lets-play-with-pydata-warsaw-2019.json index 6e977d613..79bc164c8 100644 --- a/pydata-warsaw-2019/videos/filip-geppert-how-to-effectively-extract-image-features-lets-play-with-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/filip-geppert-how-to-effectively-extract-image-features-lets-play-with-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Talk is an introduction to OpenCV 4+ pythonic API. We are going to go\nthrough most popular methods that are used in image processing and\ncomputer vision apps. We will also explore new methods that have been\nadded in the newest release of the package. No prior OpenCV knowledge is\nrequired to participate.\n\n**Intro & added value**\n\nImage processing and computer vision are gaining huge interest nowadays.\nModern machine learning models very often take advantage of features\ncalculated on images. The talk is an introduction to the most popular\npythonic image processing package \u2192 OpenCV.\n\nYou will learn the most important concepts in computer vision, and see\nhow these are applied on images. After this talk you will be ready to\nstart your first computer vision project!\n\n**Topic a.k.a. come if you:**\n\nThe talk is addressed to Data Scientists, Python Developers and Data\nEngineers that would like to see what OpenCV package offers and if it\u2019s\nthe right tool to learn. I will provide a lot of examples explaining how\nimage/video processing and feature extraction can be done in OpenCV.\n\n**The type of talk**\n\nAll code examples will be shared afterwards through GitHub repository.\nDuring the talk, presentation will be either done with the help of\nJupyter Notebook or interactive slides (to simulate the programming\nprocess).\n\n**What you will learn**\n\n1. Intro to key image processing methods, such as:\n\n - thresholding techniques\n - colorspace conversion\n - contour detection\n - image filtering\n - morphological transformations\n - arithmetic with images\n - color histograms\n - shape detection\n - template matching\n - new methods that have been added since the release of 4.0 version.\n\n2. How these methods are applied with the use of OpenCV 4+ api.\n\n3. Tips&tricks to speed up image processing and feature extraction\n development time.\n", "duration": 1294, "language": "eng", - "published_at": "2020-01-03T02:09:49.000Z", "recorded": "2019-12-13", "speakers": [ "Filip Geppert" diff --git a/pydata-warsaw-2019/videos/jacek-komorowski-football-video-analysis-using-deep-learning-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/jacek-komorowski-football-video-analysis-using-deep-learning-pydata-warsaw-2019.json index 88a132899..39f13aecc 100644 --- a/pydata-warsaw-2019/videos/jacek-komorowski-football-video-analysis-using-deep-learning-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/jacek-komorowski-football-video-analysis-using-deep-learning-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "The talk will present how to combine classical computer vision\ntechniques with deep learning methods to automate analysis of football\nvideos. It'll cover efficient methods for ball and player detection and\nrecognition in long shot video coverage of football games.\n\nI'll present how to combine classical computer vision techniques with\ndeep learning methods to build a solution to automate football video\nanalysis. The talk will cover using deep convolutional neural networks\nfor ball and player detection, application of multi-view stereo methods\nfor 3D ball position recovery and using Spatial Transformer Networks to\nboost accuracy of jersey number recognition classifier. I'll discuss\npractical problems arising during long shot video analysis of football\ngames. Problems, that make apparently simple tasks, like ball detection,\nreally challenging.\n", "duration": 2116, "language": "eng", - "published_at": "2020-01-02T18:11:20.000Z", "recorded": "2019-12-13", "speakers": [ "Jacek Komorowski" diff --git a/pydata-warsaw-2019/videos/jakub-kubajek-unsupervised-learning-for-news-summarisation-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/jakub-kubajek-unsupervised-learning-for-news-summarisation-pydata-warsaw-2019.json index c1576304c..34db6963c 100644 --- a/pydata-warsaw-2019/videos/jakub-kubajek-unsupervised-learning-for-news-summarisation-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/jakub-kubajek-unsupervised-learning-for-news-summarisation-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "With hundreds of articles published every day, it is hard to keep track\nof the main issues in the media. However, the unsupervised learning may\nhelp to identify the most relevant information.\n\nWhy should we read news sites if we can automatically extract the most\nimportant information?\n\nDuring the talk, I will present how to identify and summarise the most\nimportant topics appearing on news sites. I will cover theoretical and\npractical aspects of LexRank and other unsupervised methods that may be\nused to identify key topics in media on a particular day.\n", "duration": 1559, "language": "eng", - "published_at": "2020-01-03T08:00:08.000Z", "recorded": "2019-12-12", "speakers": [ "Jakub Kubajek" diff --git a/pydata-warsaw-2019/videos/jakub-nowacki-how-to-manage-data-related-projects-and-not-fail-too-often-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/jakub-nowacki-how-to-manage-data-related-projects-and-not-fail-too-often-pydata-warsaw-2019.json index 548da8060..ecec29dc6 100644 --- a/pydata-warsaw-2019/videos/jakub-nowacki-how-to-manage-data-related-projects-and-not-fail-too-often-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/jakub-nowacki-how-to-manage-data-related-projects-and-not-fail-too-often-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Up to 85% of data-related projects fail. Most of that is due to ill\nmanagement of these projects, and no one-size-fits-all solution (not\neven agile), can solve the problem. I want to share what I learned so\nfar: from experience but also from good books and articles, start and\npromote a wider discussion on the data-related project and people\nmanagement, and how we can help ourselves.\n\nDepending on estimates between 60% and 85% of Big Data projects fail; we\nsee similar numbers for AI/ML projects. Why is that? Have you ever been\non a good- looking project that didn't deliver or was abruptly closed? A\nlot of that has to do with management, and if you think product based on\nsoftware development was hard, data-related projects are even harder. In\nthis talk I will combine my experience of being a part of or running\nteams in a few data-related projects, spanning from big data engineering\nto machine learning engineering in organization of a different size. I\nwill tell you about my successes and failures, how I feel the\ndata-related projects are perceived by the business and what can we do\nabout this. The ideas and experience are backed by a number of books,\narticles and methods circulating around in the community. You will learn\nnot only what you can do as a manager, but also as a team member. Also,\nthat Agile is good, but is also not one-size-fits-all solution, and what\nis better to pass. By no means I feel as a know-it-all person in\nmanagement, I just want to share what I learned so far, start and\npromote a wider discussion on the data-related project and people\nmanagement, and how we can help ourselves.\n", "duration": 1650, "language": "eng", - "published_at": "2020-01-02T16:36:52.000Z", "recorded": "2019-12-13", "speakers": [ "Jakub Nowacki" diff --git a/pydata-warsaw-2019/videos/jarek-potiuk-whats-coming-in-apache-airflow-20-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/jarek-potiuk-whats-coming-in-apache-airflow-20-pydata-warsaw-2019.json index 46ae31a2e..3d7da549c 100644 --- a/pydata-warsaw-2019/videos/jarek-potiuk-whats-coming-in-apache-airflow-20-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/jarek-potiuk-whats-coming-in-apache-airflow-20-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Apache Airflow is one of the most popular Data processing orchestration\nengines. After a long line of 1.10.x releases in December we wil be\nquickly approaching 2.0 release. The release is not backwards compatible\nand it will contain many improvement and changes. This talk will outline\nthe most important changes coming in Apache Airflow 2.0.\n\nJarek - as an Apache Airflow PMC member and one of the more active\nmembers of Apache Airflow community will talk about what is coming in\nApache Airflow 2.0. The Apache Airflow 2.0 is not backwards compatible\nand the community puts a lot of effort into cleaning up, refactoring and\nimproving the code and building functionaliities that are going to make\nlife easier for the users of this - one of the most popular -\norchestration engine for Data and Machine Learning processing jobs. Some\nof the most long-standing requests from the community, such as DAG\nserialisation to database and stateles webserver are only scratching the\nsurface of what's coming in Airflow 2.0.\n\nThis talk will be targeted mainly for Apache Airflow users who would\nlike to learn what can they do with the upcoming Airflow 2.0 as well as\nhow to migrate to Apache 2.0 i painlessly. It will also be a unique\nopportunity to provide feedback on early versions of Apache 2.0 that\nwill be available by then and discuss your proposals and questions with\nan Apache Airflow Commiter, so there will be plenty of time for\nquestions - during and after the talk.\n", "duration": 1922, "language": "eng", - "published_at": "2020-01-03T00:32:14.000Z", "recorded": "2019-12-12", "speakers": [ "Jarek Potiuk" diff --git a/pydata-warsaw-2019/videos/joanna-piwko-sentiment-analysis-of-tweets-in-polish-language-using-deep-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/joanna-piwko-sentiment-analysis-of-tweets-in-polish-language-using-deep-pydata-warsaw-2019.json index bf2fe9d54..4e7e0b17b 100644 --- a/pydata-warsaw-2019/videos/joanna-piwko-sentiment-analysis-of-tweets-in-polish-language-using-deep-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/joanna-piwko-sentiment-analysis-of-tweets-in-polish-language-using-deep-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Sentiment analysis in texts is a problem that can be solved using\nArtificial Intelligence. The talk's goal is to present how to make\ndetect five emotions (happiness, anger, sadness, fear, disgust) for\ntweets in Polish language using open source tools. During the\npresentation, the process from data collecting to creating a deep neural\nnetwork will be shown.\n\nSentiment analysis is one of the problems which can be solved using\nArtificial Intelligence. Most research related to the analysis of\nemotions in Polish texts from social media, (especially from Twitter)\nfocus only on classification as positive, negative or neutral. In my\npitch, I would like to concentrate on detecting 5 emotions like\nhappiness, sadness, anger, disgust and fear.\n\nDuring the presentation I will show how the whole process from data\ncollecting, preprocessing and labelling to training model and presenting\nresults looks like. The implementation was done using Python.\n\nFirst, text data from Twitter was cleaned from links, emojis and unknown\nsymbols, which are unnecessary for the analysis. For input to the model,\ntexts were converted to numeric representation in vectors. This\nrepresentation was generated using pretrained Word2vec model for the\nPolish language.\n\nFor labelling data, words were transformed to lemmas (dictionary form)\nusing Morfeusz 2 package, which is an inflectional analyser. This\noperation was necessary to generate vectors of the numeric\nrepresentation of emotions. For every word a vector with the numeric\nrepresentation of 5 emotions using Necki Affective Word List was\ncreated. This list contains numeric information of emotions about words\nin lemma grammar form.\n\nThe model used for the sentiment analysis was LSTM network. The first\nlayer of the model was the embedding layer which takes weights for every\nword from Word2vec model. Then were LSTM and dense layers.\n\nAt the end of presentation, I will show the results with examples of\ngood classification and misclassification.\n", "duration": 1594, "language": "eng", - "published_at": "2020-01-03T08:00:08.000Z", "recorded": "2019-12-13", "speakers": [ "Joanna Piwko" diff --git a/pydata-warsaw-2019/videos/kasimov-petrova-machine-learning-on-big-data-in-security-applications-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/kasimov-petrova-machine-learning-on-big-data-in-security-applications-pydata-warsaw-2019.json index 9289c6f71..406e4ddb6 100644 --- a/pydata-warsaw-2019/videos/kasimov-petrova-machine-learning-on-big-data-in-security-applications-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/kasimov-petrova-machine-learning-on-big-data-in-security-applications-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "In this talk, we describe how Python in combination with Apache Spark\nhelps Avast to fight bad guys. We demonstrate different use cases how we\napply machine learning to a wide range of security applications from\nanomaly detection on time series to clustering of malicious files.\n\nAvast is dedicated to creating a world that provides safety and privacy\nfor all. Every month we stop over 1.5 billion attacks and analyze 30\nmillion new executable files. Robust big data pipelines are crucial for\nus to ensure the safety of our customers. We use Apache Spark and\nmachine learning frameworks, including TensorFlow, in different areas\nsuch as network security and malware detection and classification.\n\nIn the first part of the presentation, we describe our cluster\nenvironment and talk about how we analyze, cluster, and build\nclassification models for malicious files. Clustering by itself is\nwidely used for different security applications, and Spark enables us\nwith a fast way of conducting our experiments. The pipeline is useful\nfor the research on new algorithms and the evaluation of the production\nones.\n\nIn the second part, we show the application of anomaly detection on time\nseries. As an antivirus company, we receive thousands of different\nincident reports daily. We help malware experts to analyze threats by\nnotifying them about sudden changes. We will walk you through our\nstreaming application with parallel training and serving of multiple\nTensorFlow models.\n", "duration": 1606, "language": "eng", - "published_at": "2020-01-03T02:09:49.000Z", "recorded": "2019-12-13", "speakers": [ "Yury Kasimov", diff --git a/pydata-warsaw-2019/videos/keynote-inga-strumke-machine-learning-cant-do-the-thinking-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/keynote-inga-strumke-machine-learning-cant-do-the-thinking-pydata-warsaw-2019.json index 7aae7181a..27b4bac8c 100644 --- a/pydata-warsaw-2019/videos/keynote-inga-strumke-machine-learning-cant-do-the-thinking-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/keynote-inga-strumke-machine-learning-cant-do-the-thinking-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Inga is going to share her most serious as well as most recent thoughts\non AI in today\u2019s world and the future, namely why we (unfortunately)\nshould not worry about AGI, but definitely should think hard about data\nand AI ethics.\n\nInga is going to share her most serious as well as most recent thoughts\non AI in today\u2019s world and the future, including why we will probably\nnot have the privilege to worry about AGI in the near future, how to\nachieve world domination using data like the Medici family used numbers,\nwhy we should be inspired by containers in the shipping industry, and\nhow to code our way to tomorrow\u2019s ethics - assuming that the world is a\nclosed system. This is a talk to be inspired and discuss!\n", "duration": 2443, "language": "eng", - "published_at": "2020-01-03T02:09:49.000Z", "recorded": "2019-12-13", "speakers": [ "Inga Strumke" diff --git a/pydata-warsaw-2019/videos/keynote-malte-pietsch-transfer-learning-entering-a-new-era-in-nlp-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/keynote-malte-pietsch-transfer-learning-entering-a-new-era-in-nlp-pydata-warsaw-2019.json index dfc799d13..d9b98be73 100644 --- a/pydata-warsaw-2019/videos/keynote-malte-pietsch-transfer-learning-entering-a-new-era-in-nlp-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/keynote-malte-pietsch-transfer-learning-entering-a-new-era-in-nlp-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Transfer learning has been changing the NLP landscape tremendously since\nthe release of BERT one year ago. Transformers of all kinds have\nemerged, dominate most research leaderboards and have made their way\ninto industrial applications. In this talk we will dissect the paradigm\nof transfer learning and its effects on pipelines, modelling and the\nengineers mindset.\n\nSufficient training data is often a bottleneck for real-world machine\nlearning applications. The computer vision community mitigated this\nproblem by pretraining models on ImageNet and transferring knowledge to\nthe desired task. Thanks to an emerging new class of deep language\nmodels, transfer learning has also become the new standard in NLP. In\nthis talk we will share strategies, tips & tricks along all model\nphases: Pretraining a language model from scratch, adjusting it for\ndomain specific language and fine-tuning it for the desired down-stream\ntask. We will demonstrate the practical implications by showing how\nmodels like BERT caused major breakthroughs for the task of Question\nAnswering.\n", "duration": 2729, "language": "eng", - "published_at": "2020-01-03T02:09:49.000Z", "recorded": "2019-12-13", "speakers": [ "Malte Pietsch" diff --git a/pydata-warsaw-2019/videos/keynote-romeo-kienzler-trusted-ai-building-reproducible-unbiased-and-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/keynote-romeo-kienzler-trusted-ai-building-reproducible-unbiased-and-pydata-warsaw-2019.json index 02ea2f276..6273c1d61 100644 --- a/pydata-warsaw-2019/videos/keynote-romeo-kienzler-trusted-ai-building-reproducible-unbiased-and-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/keynote-romeo-kienzler-trusted-ai-building-reproducible-unbiased-and-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Untrusted AI doesn\u2019t make it into production. The concerns are just too\nhigh. In this talk we\u2019ll show how data lineage, bias detection,\nadversarial robustness and model explainability can be achieved using an\nopen source stack\n\nUntrusted AI doesn\u2019t make it into production. The concerns are just too\nhigh. In this talk we\u2019ll show how data lineage, bias detection,\nadversarial robustness and model explainability can be achieved using an\nopen source stack\n", "duration": 1707, "language": "eng", - "published_at": "2020-01-03T00:30:28.000Z", "recorded": "2019-12-12", "speakers": [ "Romeo Kienzler" diff --git a/pydata-warsaw-2019/videos/keynote-vince-madai-the-ethics-of-artificial-intelligence-what-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/keynote-vince-madai-the-ethics-of-artificial-intelligence-what-pydata-warsaw-2019.json index 5ecb9fe8e..de78d26e7 100644 --- a/pydata-warsaw-2019/videos/keynote-vince-madai-the-ethics-of-artificial-intelligence-what-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/keynote-vince-madai-the-ethics-of-artificial-intelligence-what-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "In my talk I will explore the ethics of artificial intelligence. I will\nintroduce morality and ethics and will give examples in which areas\nartificial intelligence - or better phrased machine learning - is\nleading and will lead to ethical challenges. I will focus in my talk\nespecially on the developer\u00b4s perspective: What are special ethical\nchallenges developers will be facing.\n\nIn my talk I will explore the ethics of artificial intelligence. I will\nintroduce morality and ethics and will give examples in which areas\nartificial intelligence - or better phrased machine learning - is\nleading and will lead to ethical challenges. I will focus in my talk\nespecially on the developer\u00b4s perspective: What are special ethical\nchallenges developers will be facing and how can they prepare? How can\nstudents and developers be trained to spot ethical challenges and how\ncan they solve them?\n", "duration": 2318, "language": "eng", - "published_at": "2020-01-03T00:30:08.000Z", "recorded": "2019-12-12", "speakers": [ "Vince Madai" diff --git a/pydata-warsaw-2019/videos/marcin-kowiel-how-to-numerically-represent-semi-structured-log-data-for-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/marcin-kowiel-how-to-numerically-represent-semi-structured-log-data-for-pydata-warsaw-2019.json index 1eefec973..e62dfd8b3 100644 --- a/pydata-warsaw-2019/videos/marcin-kowiel-how-to-numerically-represent-semi-structured-log-data-for-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/marcin-kowiel-how-to-numerically-represent-semi-structured-log-data-for-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Representation of text data from semi-structured log records is a\nchallenging problem that is crucial for the quality of anomaly detection\nengines. In the presentation, I will show a pipeline to create vector\nembeddings and normalization rules on semi-structured, text data that\ncould be used in anomaly detection problems.\n\nSemi-structured data such as server logs or system activity metadata is\nkey to detect cybersecurity threats or security breaches. At F-Secure,\nwe apply a variety of machine learning methods to detect anomalies in\nthe stream of semi- structured text-based events to protect our\ncustomers. However, many advanced techniques require a numerical\nrepresentation of text data (file paths, program names, command line\narguments, registry records). The most popular methods (one-hot-encoding\nand simple embeddings) do not capture the specific context and semantics\nof log data. Typically, when processing the log data, the vocabulary is\nmuch bigger than in natural languages. Moreover, we need to identify and\nnormalize randomly generated paths, temporary files, software versions\nor command-line arguments.\n\nI will present a pipeline to create vector embeddings and normalization\nrules on semi-structured data using the popular natural language\nprocessing (NLP) Word2Vec model. At the end I will show a simple anomaly\ndetection engine that uses the embeddings to find potentially malicious\nactivity. If you are interested in cybersecurity, NLP or log processing\nyou should find it appealing.\n", "duration": 1463, "language": "eng", - "published_at": "2020-01-03T00:31:14.000Z", "recorded": "2019-12-12", "speakers": [ "Marcin Kowiel" diff --git a/pydata-warsaw-2019/videos/marcin-tuszynski-visual-search-allegropl-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/marcin-tuszynski-visual-search-allegropl-pydata-warsaw-2019.json index 65eac1feb..3f268bc5d 100644 --- a/pydata-warsaw-2019/videos/marcin-tuszynski-visual-search-allegropl-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/marcin-tuszynski-visual-search-allegropl-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "During this talk, I will share my experience gathered during the\ndevelopment of Allegro's visual search engine. I will present our entire\njourney from the plain idea through different modelling approaches up to\nits current solution. Finally, I will provide some tips and tricks that\nwe have learned along the way and show a lot of images - after all,\nthat's what you're looking for!\n\nHow can one effectively find a recently seen t-shirt on an e-commerce\nplatform such as Allegro? Or maybe an unusual set of cups, that is used\nby the coffee shop downstairs? One could try to describe them, but a\npicture is worth a thousand words - what if that one picture or actually\na photo was enough? With over 100 million offers and with multiple\nphotos for every product, this seems nearly impossible. Especially given\nhow similar some products are. During this talk, I will share how we\nmake it possible by introducing a visual search model. I will focus on\nour machine learning model and its evolution over time but also cover\nsome technical aspects of our approach.\n", "duration": 1675, "language": "eng", - "published_at": "2020-01-02T16:37:14.000Z", "recorded": "2019-12-13", "speakers": [ "Marcin Tuszy\u0144ski" diff --git a/pydata-warsaw-2019/videos/marina-volkova-machine-learning-spacecraft-designing-for-cybersecurity-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/marina-volkova-machine-learning-spacecraft-designing-for-cybersecurity-pydata-warsaw-2019.json index 35984bd32..de0ded47b 100644 --- a/pydata-warsaw-2019/videos/marina-volkova-machine-learning-spacecraft-designing-for-cybersecurity-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/marina-volkova-machine-learning-spacecraft-designing-for-cybersecurity-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Making the world a safer place is not rocket science, it's data science.\n\nIn the field of cybersecurity, data scientists use computational methods\nto develop more effective ways to find security threats hidden in data\nflows of IT network communications. This talk, focused on data\nscientists, will provide a moderately technical representation of how\nmachine learning and data science solve real-life problems,\ndemonstrating how threat detection is enriched and improved through\nmachine learning and data science.\n\nI will address:\n\n- The ups and downs of AI and ML in this application: What\u2019s so\n complicated about cybersecurity anyway? \\* Machine Learning taxonomy:\n classification, clustering, anomaly detection\n- When is an anomaly an anomaly, and when is it not an anomaly?\n- How we built Machine Learning spaceships to identify one of the\n oldest and most persistent attack vectors (i.e. SQL Injections): Big,\n Bigger, and the Biggest versions.\n- Trade-off with accuracy and complexity: Do we need to destroy a\n planet to get rid of SQL injections?\n", "duration": 1507, "language": "eng", - "published_at": "2020-01-03T08:00:08.000Z", "recorded": "2019-12-13", "speakers": [ "Marina Volkova" diff --git a/pydata-warsaw-2019/videos/martyna-urbanek-trzeciak-ml-model-from-an-idea-to-production-with-the-help-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/martyna-urbanek-trzeciak-ml-model-from-an-idea-to-production-with-the-help-pydata-warsaw-2019.json index fb389168f..7c23cce45 100644 --- a/pydata-warsaw-2019/videos/martyna-urbanek-trzeciak-ml-model-from-an-idea-to-production-with-the-help-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/martyna-urbanek-trzeciak-ml-model-from-an-idea-to-production-with-the-help-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "We will talk about how Python can be helpful in the machine learning\nprojects on every step of the process starting with the idea where\ndata-driven predictions can be helpful, through data preparation, data\nmodelling, communication of the results using various visualizations and\nimplementation on production including monitoring of model performance.\n\nWe will talk about how Python can be helpful in the machine learning\nprojects. We will consider every step of the process starting with the\nidea where data- driven predictions can be helpful, through data\npreparation, data modelling, communication of the results using various\nvisualizations and implementation on production including monitoring of\nmodel performance. I will mention various Python libraries that can be\nof use on each step and consider real use cases to dive deeper into some\nof the mentioned steps. During the talk I will also mention open source\nPython library for easy data access that we develop in Fandom.\n", "duration": 1516, "language": "eng", - "published_at": "2020-01-03T08:00:08.000Z", "recorded": "2019-12-12", "speakers": [ "Martyna Urbanek-Trzeciak" diff --git a/pydata-warsaw-2019/videos/mateusz-opala-reproducible-machine-learning-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/mateusz-opala-reproducible-machine-learning-pydata-warsaw-2019.json index d7ab2265c..4a6830ee4 100644 --- a/pydata-warsaw-2019/videos/mateusz-opala-reproducible-machine-learning-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/mateusz-opala-reproducible-machine-learning-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Reproducibility is a cornerstone of scientific methods. Especially in\nproduction Machine Learning it's crucial to ensure that hidden source of\nrandomness is not a real reason for a model performance improvement. In\nmy talk I will elaborate on importance of reproducibility and show how\nwe build reproducible machine learning pipelines at Netguru.\n\nReproducibility is a cornerstone of scientific methods. Especially in\nproduction Machine Learning it's crucial to ensure that hidden source of\nrandomness is not a real reason for a model performance improvement.\nAlthough, reproducibility in building machine learning papers seems to\nbe must-have, it's still not a standard.\n\nOutline of talk:\n\n1. Definitions:\n\n - reproducibility\n - replicability\n - generalisability\n\n2. Motivation for achieving reproducibility\n3. Full reproducibility == Continuous Delivery for ML\n4. Changes in ML development process\n\n - code\n - data\n - models\n\n5. How we managing change in ML development process?\n6. Data versioning\n\n - Quilt Data\n\n7. Experiments management\n\n - MLFlow / Polyaxon\n\n8. Summary\n", "duration": 1771, "language": "eng", - "published_at": "2020-01-02T18:11:20.000Z", "recorded": "2019-12-12", "speakers": [ "Mateusz Opala" diff --git a/pydata-warsaw-2019/videos/mia-polovina-analysing-russian-troll-tweets-data-with-python-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/mia-polovina-analysing-russian-troll-tweets-data-with-python-pydata-warsaw-2019.json index d8ddde9eb..b2274fbcf 100644 --- a/pydata-warsaw-2019/videos/mia-polovina-analysing-russian-troll-tweets-data-with-python-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/mia-polovina-analysing-russian-troll-tweets-data-with-python-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "This talk focuses on the insights gathered from analysis of Russian\nTroll Tweets, a dataset created by researchers at Clemson University and\nreleased on Github by FiveThirtyEight.\n\nSocial media sites are increasingly used for propagation of\nmisinformation. Recent efforts include sophisticated campaigns run on\nFacebook and Twitter which aimed to interfere in the 2016 US elections\nand politics. This talk will focus on a campaign that saw trolls engaged\nin such efforts on Twitter. Insights obtained with Exploratory Data\nAnalysis (EDA) and text analysis of Russian Troll Tweets dataset will be\npresented. The talk will also highlight the importance of combating\nmisinformation and computational propaganda.\n", "duration": 2257, "language": "eng", - "published_at": "2020-01-02T16:36:08.000Z", "recorded": "2019-12-12", "speakers": [ "Mia Polovina" diff --git a/pydata-warsaw-2019/videos/michal-jamroz-posterior-collapse-in-deep-generative-models-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/michal-jamroz-posterior-collapse-in-deep-generative-models-pydata-warsaw-2019.json index 9748b9756..8db911e14 100644 --- a/pydata-warsaw-2019/videos/michal-jamroz-posterior-collapse-in-deep-generative-models-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/michal-jamroz-posterior-collapse-in-deep-generative-models-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Generative models are powerful Machine Learning models useful at\nextracting information from high-dimensional data, but they sometimes\nsuffer from the problem called \"posterior collapse\" which prevents them\nfrom learning representation having practical value. I am going to show\nwhy and when it happens, also how to deal with it.\n\n**Why**\n\nDeep generative models like Variational AutoEncoders (VAEs) and\nGenerative Adversarial Networks (GANs) turned out to be very successful\nin real-world applications of machine learning, including: natural image\nmodelling, data compression, audio synthesis and many more.\nUnfortunately, it appears that models belonging to VAEs family - under\nsome conditions may suffer from an undesired phenomenon called\n\"posterior collapse\" which causes them to learn poor data\nrepresentation. The talk's purpose is to present this problem and its\npractical implications.\n\n**What**\n\nThe presentation will comprise following elements:\n\n- Short introduction of basic Variational AutoEncoder model\n- Introducing the \"posterior collapse\" problem\n- How posterior collapse affects learning from data - natural images\n examples\n- Some research on dealing with posterior collapse\n\n**Audience**\n\nBeing familiarised with the topic of generative modelling will be\nhelpful for anyone attending the talk, but it's not required. In fact,\neveryone having basic understanding of neural networks, representation\nlearning and probability can gain useful information. Presentation won't\nbe overloaded with mathematical formulas, I will do my best to present\nmath-related aspects in an intuitive form.\n", "duration": 1669, "language": "eng", - "published_at": "2020-01-02T16:34:52.000Z", "recorded": "2019-12-12", "speakers": [ "Micha\u0142 Jamro\u017c" diff --git a/pydata-warsaw-2019/videos/michal-kierzynka-transfer-learning-for-image-recognition-in-healthcare-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/michal-kierzynka-transfer-learning-for-image-recognition-in-healthcare-pydata-warsaw-2019.json index a3955ca60..1a66096be 100644 --- a/pydata-warsaw-2019/videos/michal-kierzynka-transfer-learning-for-image-recognition-in-healthcare-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/michal-kierzynka-transfer-learning-for-image-recognition-in-healthcare-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Transfer learning is a powerful technique to boost the performance of a\ndeep learning model. However, the healthcare industry often has very\nspecific image data sets that are dissimilar to the large-scale data\nsets used to pretrain the publicly available models. Therefore, are\nthere any benefits of applying transfer learning in healthcare? Come and\nlisten to find out.\n\nThe idea of transfer learning is to reuse features learned on a related\ntask to improve the performance of a model on a new task. The advantages\nof transfer learning are well known: faster training, less labeled data\nneeded and higher accuracy of the final model. Therefore, the use of\npretrained models became a de facto standard in many practical\napplications, among others in computer vision. This is all under the\nassumption that the features learned on the source task are generic\nenough to be reused on a target task. However, the healthcare industry\noften has very specific data sets that are rather dissimilar to the\nlarge-scale and publicly available data sets used to pretrain the\nmodels. The goal of the presentation is to show if there are any\nadvantages of using pretrained models in such settings.\n\nTo find out, we have designed a dedicated experiment in which we compare\nthe performance of various CNN architectures applied to different\nmedical imaging data sets, both public and private. We initialize the\nmodels either randomly or with ImageNet pretrained parameters with\nvarious settings. We also compare the results to the performance of a\nsmall, custom designed CNN networks.\n\nThe presentation will have the following outline. First, the audience\nwill be introduced to the transfer learning and its variants. Later, the\ndesign of the dedicated computational experiments will be presented,\nfollowed by the results and conclusions. The latter will be compared to\nthe conclusions from the latest state of the art papers on the topic.\n\nThe participants will have a unique opportunity to learn about the\nbenefits and pitfalls of applying transfer learning to imaging data sets\nthat are much more specific than natural images from ImageNet.\n\nBackground knowledge required to understand the presentation:\nintermediate knowledge about machine learning, deep learning and CNNs.\n", "duration": 1788, "language": "eng", - "published_at": "2020-01-03T00:30:43.000Z", "recorded": "2019-12-12", "speakers": [ "Micha\u0142 Kierzynka" diff --git a/pydata-warsaw-2019/videos/michel-voss-detection-of-solar-panels-based-on-aerial-images-of-the-city-of-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/michel-voss-detection-of-solar-panels-based-on-aerial-images-of-the-city-of-pydata-warsaw-2019.json index 137d0c5e3..cf2d34f63 100644 --- a/pydata-warsaw-2019/videos/michel-voss-detection-of-solar-panels-based-on-aerial-images-of-the-city-of-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/michel-voss-detection-of-solar-panels-based-on-aerial-images-of-the-city-of-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "The main goal of the article is to present the results of a study on the\nuse of deep learning networks to detect solar panels based on aerial\nimages of Poznan. In addition, the main motivation is to obtain more\ndetailed information about the use of solar energy in Poland drawing on\nbig data sources, which until now have not been used for this purpose.\n\nThe data was acquired from the Management Board of Geodesy and Municipal\nCadastre GEOPOZ in Pozna\u0144 and included orthophotomaps for 2016 and the\nlayer of buildings and plots of lands. We extracted buildings from the\nimages using R statistical software and the sf package. To detect solar\npanels we used the Turi Create library written in Python which\nre-implements the YOLO (You Only Look Once) library.\n\nThe object recognition algorithm was trained on a sample of images that\nincluded annotations (bounding boxes) about the exact location of solar\npanels. The results indicate a very high recognition efficiency at the\nlevel of 96-99% on the test sample. Based on this procedure we found\nthat around 2% of residential buildings in Pozna\u0144 in 2016 had solar\npanels mounted on roofs.\n\nAs far as we know, this is the first use of deep learning to detect\nsolar panels in Poland. Currently, similar studies are being carried out\nby for instance Statistics Netherlands as part of the DeepSolaris\nproject. The study exemplifies a trend involving the use of aerial and\nsatellite images for statistical purposes thanks to advanced machine\nlearning algorithms and open source software.\n", "duration": 1062, "language": "eng", - "published_at": "2020-01-03T08:00:08.000Z", "recorded": "2019-12-13", "speakers": [ "Michel Voss" diff --git a/pydata-warsaw-2019/videos/olszewski-otmianowski-how-to-efficiently-model-learners-knowledge-with-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/olszewski-otmianowski-how-to-efficiently-model-learners-knowledge-with-pydata-warsaw-2019.json index ad18f0eb8..79eaec228 100644 --- a/pydata-warsaw-2019/videos/olszewski-otmianowski-how-to-efficiently-model-learners-knowledge-with-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/olszewski-otmianowski-how-to-efficiently-model-learners-knowledge-with-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "During our presentation we will share the results and experiences\nconnected with implementing state-of-the-art techniques for modelling\nlearners knowledge using Recurrent Neural Networks (Deep Knowledge\nTracing).\n\nKnowledge Tracing (KT) is one of the most important research areas in\npersonalized education nowadays. It allows us to trace learners\u2019\nknowledge over time so that we can accurately predict how they will\nperform in the future. By improving the quality of such models we can\nbetter adjust the adaptive learning experience to the needs of\nparticular students. In recent years the idea of using recurrent neural\nnetworks for learners knowledge tracing (Deep Knowledge Tracing, DKT)\ngained a lot of attention, as it has been shown that it generally\noutperforms traditional methods. During our presentations we will share\nthe results and experiences connected with implementing this method in\none of the Pearson personalized learning products. We will focus on\nchallenges that we have encountered during the model development process\nrelated to the framework we\u2019ve used (TensorFlow), training performance,\nexperiment tracking and having multiple people working simultaneously on\nthe same model. We\u2019ll also share the results and compare them with the\nstate of the art results from other papers.\n", "duration": 1875, "language": "eng", - "published_at": "2020-01-03T08:00:08.000Z", "recorded": "2019-12-13", "speakers": [ "Mateusz Otmianowski", diff --git a/pydata-warsaw-2019/videos/pawel-cyrta-sound-modelling-parametric-methods-and-deep-learning-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/pawel-cyrta-sound-modelling-parametric-methods-and-deep-learning-pydata-warsaw-2019.json index 86ff8ed5f..9d15fb2b4 100644 --- a/pydata-warsaw-2019/videos/pawel-cyrta-sound-modelling-parametric-methods-and-deep-learning-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/pawel-cyrta-sound-modelling-parametric-methods-and-deep-learning-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Sound in digital form for years is in the loop of steps: record, mix,\nedit, playback and listen. Some creative ones did create synthetic\nspeech and music with affordable tools. Now both computation and data\nresources available made synthesis and sound shaping possible using\nparametric, physical methods or with the latest deep learning\nrepresentation models. Discover the theory and code to do it.\n\nSound in digital form for years is in the loop of steps: record, mix,\nedit, playback and listen. Some creative ones did create synthetic\nspeech and music with affordable tools. Now both computation and data\nresources available made synthesis and sound shaping possible using\nparametric, physical methods or with the latest deep learning\nrepresentation models. Discover the theory and code to do it.\n\nTechniques like deep learning can now create a whole set of basic audio\nprocessing functions - filtering, equalization, compression - are\nbecoming available to model too. They can also extract latent\nrepresentation and make it be controlled to produce real sound. One can\ncreate his own replica for specific guitar amplifier sound or try to\nfind parameters that produces unique audio for film or game sound\neffects.\n\nWe will take a quest to find out how to model sound by exploring and\nrunning experiments with python code. The experiment consists of three\nmain parts:\n\n1. Exploration how to get audio data and represent it using equations\n and recent deep learning models.\n2. Analysis by visualization, clustering .\n3. Mimicking real vibrations by approximation using parametric physical\n models .\n4. Grab DNN models to make them speak, play and sing.\n\nIn addition, I will describe the model used, present the code the\nresults that perform audio out of the model.\n\nIn summary, at the end of this talk you will have learned (I hope) how\ndeal with sound signals and that it may be combined to create music or\njust bizarre, wacky sounds\n", "duration": 1776, "language": "eng", - "published_at": "2020-01-03T00:31:29.000Z", "recorded": "2019-12-12", "speakers": [ "Pawel Cyrta" diff --git a/pydata-warsaw-2019/videos/pierre-gherman-geospatial-analysis-made-easy-with-postgis-and-geoalchemy-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/pierre-gherman-geospatial-analysis-made-easy-with-postgis-and-geoalchemy-pydata-warsaw-2019.json index e9a93cbb7..a7ee02051 100644 --- a/pydata-warsaw-2019/videos/pierre-gherman-geospatial-analysis-made-easy-with-postgis-and-geoalchemy-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/pierre-gherman-geospatial-analysis-made-easy-with-postgis-and-geoalchemy-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Using geospatial data is easier than ever. Data can easily be found on\nopen data portals or simply on Open Street Map. But how can we use this\ndata? What kind of tools are there to store and analyse this data in an\noptimal manner? During this talk we will answer these questions. We will\ndiscuss particularly about PostGIS, an extension of PostgreSQL database,\nand GeoAlchemy, a Python library.\n\nErik Brynjolfsson, a professor at MIT, once said that \"Every time we\ninvent something, we make it easier to invent something else\". That is\nprobably especially true in the digital space, given the pace at which\ntechnology is evolving. While working on a pricing model that is\nevaluating the price of apartments in Italy, based on information such\nas location, size or status, we realized that it would be very useful to\nhave more information about the area where the apartments are located.\nThis helped us discover that finding open geospatial data is not very\ndifficult these days, but processing and storing it require some\nparticular skills and tools. PostGis is an open source extension of the\nPostgreSQL Database Management system that allows users to store\ngeospatial data as specific data types (geometries and geographies). In\naddition, it helps the user handle this data using some specific spatial\nfunctions such as distance, area, etc. SQLAlchemy is a SQL library of\nPython implementing an object-relational mapping concept. The advantage\nof this library is that developers no longer need to write SQL queries\nin their Python code when working with a database, but they can write\nPython classes instead that are translated to SQL statements. GeoAlchemy\nis an extension of SQLAlchemy that facilitates working with spatial\ndatabases, such as PostGIS. During this talk we will shortly talk about\nthe use-case that required the use of the tools mentioned above and walk\nyou though an example using data from Open Street Map, stored in geojson\nfiles. Our aim is to raise awareness in the audience about how\ngeospatial data can be manipulated and stored using Python and open\nsource database management systems. The talk is aimed at scientists and\ndevelopers interested in working with geospatial data using Python.\nThere are no advanced topics in the talk, but in order to get most of\nthe talk you should have some knowledge of Python and SQL.\n", "duration": 1775, "language": "eng", - "published_at": "2020-01-03T08:00:08.000Z", "recorded": "2019-12-12", "speakers": [ "Nicolas Pierre", diff --git a/pydata-warsaw-2019/videos/przemek-chrabka-how-to-structure-pyspark-application-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/przemek-chrabka-how-to-structure-pyspark-application-pydata-warsaw-2019.json index 9e5ea50be..185ec7166 100644 --- a/pydata-warsaw-2019/videos/przemek-chrabka-how-to-structure-pyspark-application-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/przemek-chrabka-how-to-structure-pyspark-application-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "A lot of the Data Scientists and Engineers don\u2019t come from Software\nEngineering background and even they have an experience with writing\nspark code they might luck the knowledge about application structure\nprincipals. This talk is designed to help them write better and more\nreadable code.\n\nPySpark has become really popular for last couple of years and is now a\ngo-to tool for building and managing data-heavy applications. One of the\nmost common ways how Spark is used is moving some data around by writing\nETL/ELT jobs. Doing that your code should be manageable and\nunderstandable to others. In this talk I will try to introduce good\npractice how to structure PySpark application and write jobs and also\nsome naming conventions.\n\nI will start this talk with an example of bad way of writing PySpark job\nand during the course of it we will gradually improve it so at the end\nour application is going to be production ready, easy to manage and\nshare with other developers.\n\nDuring this talk I will try to answer this questions: - How to structure\nPySpark ETL application - How to write ETL job - How to package your\ncode and dependencies - What are some coding and naming conventions\n", "duration": 1788, "language": "eng", - "published_at": "2020-01-03T08:00:08.000Z", "recorded": "2019-12-12", "speakers": [ "Przemek Chrabka" diff --git a/pydata-warsaw-2019/videos/pydata-warsaw-conference-2019-showreel.json b/pydata-warsaw-2019/videos/pydata-warsaw-conference-2019-showreel.json index 01bce0828..789b25525 100644 --- a/pydata-warsaw-2019/videos/pydata-warsaw-conference-2019-showreel.json +++ b/pydata-warsaw-2019/videos/pydata-warsaw-conference-2019-showreel.json @@ -2,7 +2,6 @@ "description": "www.pydata.org\r\n\r\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \r\n\r\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 163, "language": "eng", - "published_at": "2019-12-21T17:10:06.000Z", "recorded": "2019-12-13", "speakers": [ "Various speakers" diff --git a/pydata-warsaw-2019/videos/robert-kostrzewski-modern-machine-learning-flow-with-quilt-and-polyaxon-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/robert-kostrzewski-modern-machine-learning-flow-with-quilt-and-polyaxon-pydata-warsaw-2019.json index c7553c9d7..71ca66fdb 100644 --- a/pydata-warsaw-2019/videos/robert-kostrzewski-modern-machine-learning-flow-with-quilt-and-polyaxon-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/robert-kostrzewski-modern-machine-learning-flow-with-quilt-and-polyaxon-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Presentation of development flow dedicated to Machine Learning internal\nand external projects including datasets storage and versioning via\nQuilt (as an alternative to DVC), experiments scheduling and\ninfrastructure maintenance via Polyaxon, continuous Integration using\nCircleCI and Configuration & deployment using Docker.\n\nThe talk is about presenting a development flow dedicated to Machine\nLearning internal and external projects, practiced in our company\n(Netguru). The flow includes following parts:\n\n- Machine Learning datasets storage and versioning via Quilt (as an\n alternative to DVC)\n- Experiments scheduling and infrastructure maintenance via Polyaxon\n- Continuous Integration using CircleCI\n- Configuration and deployment using Docker.\n\nPresentation describes of all components and explains how to put it\ntogether. Given Machine Learning flow impacted on on our projects'\nsuccess stories. Further part of the talk would explain why it was so\ncrucial to build and maintain it.\n", "duration": 2060, "language": "eng", - "published_at": "2020-01-03T08:00:08.000Z", "recorded": "2019-12-12", "speakers": [ "Robert Kostrzewski" diff --git a/pydata-warsaw-2019/videos/tomasz-bartczak-radoslaw-bialobrzeski-learning-to-rank-with-the-transformer-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/tomasz-bartczak-radoslaw-bialobrzeski-learning-to-rank-with-the-transformer-pydata-warsaw-2019.json index a8f209d56..170642fe6 100644 --- a/pydata-warsaw-2019/videos/tomasz-bartczak-radoslaw-bialobrzeski-learning-to-rank-with-the-transformer-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/tomasz-bartczak-radoslaw-bialobrzeski-learning-to-rank-with-the-transformer-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Learning to Rank (LTR) is concerned with optimising the global ordering\nof a list of items, according to their utility to the users. In this\ntalk, we present the results of ongoing research at Allegro.pl into\napplying the Transformer architecture known from Neural Machine\nTranslation literature to the LTR setting and introduce allRank, an\nopen-source, Pytorch based framework for LTR.\n\nSelf-attention based architectures fuelled recent breakthroughs in many\nNLP tasks. Models like The Transformer, GPT-2 or BERT pushed the\nboundaries of what's possible in NLP and made headlines along the way.\nSelf-attention mechanism can be seen as an encoder for an unordered set\nof objects, taking into account interactions between items in the set.\nThis property makes self- attention mechanism an attractive choice for\nLearning to Rank (LTR) models, which usually struggle with modelling\ninter-item dependencies.\n\nIn this talk, we present the results of ongoing research in applying\nself- attention based architectures to LTR. Our proposed model is a\nmodification of the popular Transformer architecture, adapted to the LTR\ntask. We guide the audience into both the setting of LTR and its most\npopular algorithms as well the details of self-attention mechanism and\nthe Transformer architecture. We present results on both proprietary\ndata of Allegro's clickthrough logs and most popular LTR dataset,\nWEB30K. We demonstrate considerable performance gains of self-attention\nbased models over MLP baselines across popular pointwise, pairwise and\nlistwise losses. Finally, we present allRank, an open- source, Pytorch\nbased framework for neural ranking models. After the talk, the audience\nwill have a good understanding of the basics of LTR and its importance\nto the industry, as well as will see how to get started in training\nstate-of-the-art neural network models for learning to rank using\nallRank.\n", "duration": 1769, "language": "eng", - "published_at": "2020-01-02T16:35:46.000Z", "recorded": "2019-12-12", "speakers": [ "Tomasz Bartczak", diff --git a/pydata-warsaw-2019/videos/tomasz-dziopa-generative-text-modelling-scratching-the-surface-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/tomasz-dziopa-generative-text-modelling-scratching-the-surface-pydata-warsaw-2019.json index fc1d9fc18..b0e0ffc2e 100644 --- a/pydata-warsaw-2019/videos/tomasz-dziopa-generative-text-modelling-scratching-the-surface-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/tomasz-dziopa-generative-text-modelling-scratching-the-surface-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Recent progress in generating natural language text catches media\nattention. Are we about to get flooded by autogenerated fake news? Let's\nlearn about approaches to machine-generated text. Get a high level idea\nof how can you apply basic approaches like N-grams, HMMs as well as\nadvanced ones such as RNNs and VAEs. We'll apply those methods to\nreal-world datasets of Polish articles from Wikipedia.\n\nRecent progress in generating natural language text sparks controversy\nand catches global media attention. Are we about to get flooded by\nmachine- generated fake news? Are we on the edge of a completely new\nlevel of troll farms about to emerge? In this talk I will go over\napproaches to machine- generated text. You will get a high level idea of\nhow can you apply basic approaches like N-grams, Hidden Markov Model as\nwell as advanced ones such as RNNs and Variational Autoencoders. We will\ncover the main challenges like methods of evaluation, and potential use\ncases. We will also have fun applying aforementioned methods into real\nworld datasets of Polish articles from Wikipedia.\n", "duration": 1499, "language": "eng", - "published_at": "2020-01-03T00:31:43.000Z", "recorded": "2019-12-12", "speakers": [ "Tomasz Dziopa" diff --git a/pydata-warsaw-2019/videos/varun-kochar-automation-build-a-training-pipeline-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/varun-kochar-automation-build-a-training-pipeline-pydata-warsaw-2019.json index 4bd4f6ac6..f879eff4b 100644 --- a/pydata-warsaw-2019/videos/varun-kochar-automation-build-a-training-pipeline-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/varun-kochar-automation-build-a-training-pipeline-pydata-warsaw-2019.json @@ -1,7 +1,6 @@ { "description": "Many of us knows how to train & deploy ML models in cloud, but doing so\nhave we become redundant. Running multiple experiments in single machine\n& waiting for tasks to complete cannot be time-efficient for big\ndatasets. Hence, we need an automation which can take over repetitive\nmanual tasks & spare us the time to do other important stuff. Aim is to\nshow how to deploy ML architecture in 60 SECONDS\n\nML pipeline consists of many manual tasks such as Data collection, Data\ncleaning, training environment setup, training configuration, monitoring\nprogress or model evaluation gig, all these components should be\nautomated & what you should be left with is just a single CONFIGURATION\ndocument with information of different set of experiments.\n", "duration": 1384, - "published_at": "2020-01-03T08:00:08.000Z", "recorded": "2019-12-12", "speakers": [ "Varun Kochar" diff --git a/pydata-warsaw-2019/videos/zuzanna-kunik-command-line-language-where-nlp-and-cyber-security-meets-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/zuzanna-kunik-command-line-language-where-nlp-and-cyber-security-meets-pydata-warsaw-2019.json index 507c278a7..0e805414d 100644 --- a/pydata-warsaw-2019/videos/zuzanna-kunik-command-line-language-where-nlp-and-cyber-security-meets-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/zuzanna-kunik-command-line-language-where-nlp-and-cyber-security-meets-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "In the cyber security world, analysis of command line logs is important\nfor breach detection, but it is one of the most challenging problems. To\nsimplify this process, we propose a framework called CMDLang \u2013 command\nline language, which has features of natural language. I will present\nresults of successful POS and NER training using popular NLP algorithms\nand demonstrate a real use case of CMDLang\n\nAt F-Secure, in order to protect our customers, we use streams of\ncommand line logs coming from their systems to detect breaches and\nanomalies. Analysis of such data is one of the most challenging problems\nin the cyber security world. It requires domain knowledge and is hard to\nencapsulate in sets of rules.\n\nWhat if we treat command lines logs as semi-structured text data? They\nfollow a set of grammar rules and have semantics. Therefore, we propose\nthe framework of CMDLang \u2013 command line language, which has features of\nnatural language. We performed successful trainings of part of speech\n(POS) tagger and named entities recognition (NER) models. Using CMDLang\nalong with NLP methods enables normalization of logs, parsing and their\ncategorization. With a defined language framework, we are able to\nanalyze huge streams of data faster, which improves our detection\ncapabilities.\n\nDuring the talk, I will present results of the CMDLang creation using\npopular, open-source NLP algorithms. I will give a walkthrough of the\nprocess and define the main ideas behind this language. At the end I\nwill demonstrate usage of CMDLang in a real use case.\n\nThis talk will be interesting for every NLP enthusiast, as well for\npeople working with (semi-)structured text data or log processing.\nDuring the presentation, I will explain any cyber security terminology\nused.\n", "duration": 1587, "language": "eng", - "published_at": "2020-01-03T02:09:49.000Z", "recorded": "2019-12-13", "speakers": [ "Zuzanna Kunik" diff --git a/pydata-warsaw-2019/videos/zygimantas-medelis-adding-narrative-to-bi-dashboards-with-natural-language-pydata-warsaw-2019.json b/pydata-warsaw-2019/videos/zygimantas-medelis-adding-narrative-to-bi-dashboards-with-natural-language-pydata-warsaw-2019.json index 188bf243a..68566f0bb 100644 --- a/pydata-warsaw-2019/videos/zygimantas-medelis-adding-narrative-to-bi-dashboards-with-natural-language-pydata-warsaw-2019.json +++ b/pydata-warsaw-2019/videos/zygimantas-medelis-adding-narrative-to-bi-dashboards-with-natural-language-pydata-warsaw-2019.json @@ -2,7 +2,6 @@ "description": "Data visualizations are not always sufficient in understanding important\naspects of data. People have to interpret charts and build narratives\nabout it. Natural language generation technology can be used to greatly\nimprove the communicative power of the data. In this talk I will\nintroduce NLG and will present a case through the use of two open source\ntools: Accelerate Text (NLG) and Metabase (BI)\n\nBI tools excel at visually organizing data in various dashboards. Yet\ndata visualizations are not always sufficient in understanding important\naspects of data. People have to interpret charts and build narratives\nabout data on their own. Furthermore, different groups of people often\nneed different narratives about the same data. For example, a story\nabout the same sales data is different for a CFO and for an Order\nFulfillment Manager.\n\nNatural language generation technology (NLG) can be used to greatly\nimprove the communicative power of a BI dashboards. With the use of NLG\nwe can produce different descriptions of the same data adopted to the\nneeds of each person reading it.\n\nThe integration between BI and NLG will be demonstrated through the use\nof two open source products: (a) TokenMill's open source NLG tool called\nAccelerated Text (https://github.com/tokenmill/acceleratedtext), to\nillustrate how natural language can be generated using data; (b) and the\nBI platform Metabase (https://github.com/metabase/metabase), to provide\nanalytical dashboards which will include both data visualizations and\nautomatically generated natural language.\n", "duration": 1551, "language": "eng", - "published_at": "2020-01-03T02:09:49.000Z", "recorded": "2019-12-13", "speakers": [ "\u017dygimantas Medelis"