Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 20 additions & 19 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -1,43 +1,44 @@
# This is a basic workflow to help you get started with Actions

name: CI

# Controls when the workflow will run
on:
# Triggers the workflow on push or pull request events but only for the master branch
push:
branches: [ master ]
pull_request:
branches: [ master ]

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- name: Check out repository
uses: actions/checkout@v2

- name: Checkout
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"

# Runs a single command using the runners shell
- name: Install package with test dependencies
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[tests]"
pip install -e ".[tests,docs]"

# Runs a set of commands using the runners shell
- name: Run unit tests
run: pytest tests/unit/

- name: Install hatch
run: pip install hatch

- name: Build documentation (PR only)
if: github.event_name == 'pull_request'
run: |
hatch run docs:build

- name: Show Sphinx error log (if any)
if: failure() || always()
run: |
pytest tests/unit/
echo "---- sphinx error logs ----"
ls -la /tmp | grep sphinx || true
cat /tmp/sphinx-err-* || true
23 changes: 22 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
*TODO: the above badges that indicate python version and package version will only work if your package is on PyPI.
If you don't plan to publish to PyPI, you can remove them.*

Overview
## Overview

AutoEDA is a lightweight Python package designed to automate the most common and time-consuming steps of Exploratory Data Analysis (EDA). Given a pandas DataFrame, AutoEDA quickly surfaces data quality issues, statistical summaries, and meaningful visualizations to help data scientists and analysts understand their data before modeling.

Expand Down Expand Up @@ -74,6 +74,27 @@ AutoEDA sits in the space between low-level EDA utilities and fully automated pr
- Tight integration with pandas and matplotlib/seaborn
- A focus on EDA as code, suitable for notebooks, scripts, and production pipelines

## Development Setup

Create the environment.

```bash
conda env create -f environment.yml
conda activate autoeda-dev
```

Install the package in editable mode.

```bash
pip install -e .
```

Run tests

```bash
pytest
```

## Contributors

- Eli Gonzalez
Expand Down
19 changes: 5 additions & 14 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
@@ -1,42 +1,33 @@
project:
type: website

# tell quarto to read the generated sidebar
metadata-files:
- reference/_sidebar.yml

# tell quarto to read the generated styles
format:
html:
css:
- reference/_styles-quartodoc.css

quartodoc:
# the name used to import the package you want to create reference docs for
package: autoeda
# write sidebar and style data
sidebar: reference/_sidebar.yml
css: reference/_styles-quartodoc.css

sections:
- title: Summary statistics
desc: Functions to perform automated exploratory data analysis.
package: autoeda.get_summary_df
# list of functions/classes/modules to document.
package: autoeda
contents:
# the functions being documented in the package.
# you can refer to anything: class methods, modules, etc..
- get_summary_df

- title: Data inspection
desc: Functions to inspect data for common issues.
package: autoeda.inspect
package: autoeda.inspect
contents:
# the functions being documented in the package.
# you can refer to anything: class methods, modules, etc..
- get_unary
- get_high_cardinality

- title: Missing data and outliers
desc: Function to check amount of missing data in a DataFrame.
package: autoeda.check_na_outlier
Expand All @@ -48,4 +39,4 @@ quartodoc:
package: autoeda.plot
contents:
- plot_correlation_heatmap
- plot_histograms_by_target
- plot_histograms_by_target
99 changes: 60 additions & 39 deletions docs/_site/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>

<meta charset="utf-8">
<meta name="generator" content="quarto-1.8.27">
<meta name="generator" content="quarto-1.7.33">

<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">

<meta name="description" content="Automated EDA tools for fast and clear data exploration.">

<title>index</title>
<title>AutoEDA Documentation</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
Expand Down Expand Up @@ -67,15 +68,14 @@
<meta name="quarto:offset" content="./">
<script src="site_libs/quarto-html/quarto.js" type="module"></script>
<script src="site_libs/quarto-html/tabsets/tabsets.js" type="module"></script>
<script src="site_libs/quarto-html/axe/axe-check.js" type="module"></script>
<script src="site_libs/quarto-html/popper.min.js"></script>
<script src="site_libs/quarto-html/tippy.umd.min.js"></script>
<script src="site_libs/quarto-html/anchor.min.js"></script>
<link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
<link href="site_libs/quarto-html/quarto-syntax-highlighting-ed96de9b727972fe78a7b5d16c58bf87.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<link href="site_libs/quarto-html/quarto-syntax-highlighting-ea385d0e468b0dd5ea5bf0780b1290d9.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="site_libs/bootstrap/bootstrap.min.js"></script>
<link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="site_libs/bootstrap/bootstrap-ff63373b1067ca6f91cf1456aa1f00a2.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
<link href="site_libs/bootstrap/bootstrap-a14e3238c51140e99ccc48519b6ed9ce.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
<script id="quarto-search-options" type="application/json">{
"location": "sidebar",
"copy-button": false,
Expand Down Expand Up @@ -117,39 +117,61 @@
<!-- margin-sidebar -->

<!-- main -->
<main class="content" id="quarto-document-content"><header id="title-block-header" class="quarto-title-block"></header>
<main class="content" id="quarto-document-content">

<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title">AutoEDA Documentation</h1>
</div>

<div>
<div class="description">
Automated EDA tools for fast and clear data exploration.
</div>
</div>


<section id="welcome-to-autoedas-documentation" class="level1">
<h1>Welcome to AutoEDA’s Documentation ’</h1>
<section id="overview" class="level2">
<h2 class="anchored" data-anchor-id="overview">Overview</h2>
<div class="{toctree}">
<p>:maxdepth: 2 :hidden: :caption: Contents:</p>
<p>Home <self></self></p>
</div>
<p>This is the landing page of your docs. you can update it as you’d like to. This documentation example uses myst markdown as the primary documentation syntax.</p>
<p>:::{button-link} <a href="https://www.pyopensci.org/python-package-guide/documentation/hosting-tools/myst-markdown-rst-doc-syntax.html" class="uri">https://www.pyopensci.org/python-package-guide/documentation/hosting-tools/myst-markdown-rst-doc-syntax.html</a> :color: primary :class: sd-rounded-pill float-left</p>
<p>Learn more about myst in our pyOpenSci packaging guide.</p>
<p>:::</p>
<p>Myst is a version of markdown that has more formatting flexibility. This is what a sphinx directive looks like using myst markdown formatting:</p>
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb1"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>:::{toctree}</span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="sc">:maxdepth:</span> 2</span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="sc">:caption:</span> Contents:</span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a>:::</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
<p>If you see syntax like the syntax below, you are looking at rst.</p>
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb2"><pre class="sourceCode rst code-with-copy"><code class="sourceCode rest"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="dt">.. toctree::</span></span>
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">:maxdepth:</span> 2</span>
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">:caption:</span> Contents:</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
</section>
<section id="copyright" class="level2">
<h2 class="anchored" data-anchor-id="copyright">Copyright</h2>
<div class="quarto-title-meta">




</div>



</header>


<section id="welcome-to-autoeda" class="level2">
<h2 class="anchored" data-anchor-id="welcome-to-autoeda">Welcome to AutoEDA</h2>
<p>AutoEDA is a lightweight Python package designed to accelerate exploratory data analysis (EDA) by generating summary statistics, detecting common data issues, and creating useful visualizations.</p>
<hr>
<section id="quick-links" class="level3">
<h3 class="anchored" data-anchor-id="quick-links">📌 Quick Links</h3>
<ul>
<li>Copyright © 2026 Eli Gonzalez, Gurleen Kaur, Gloria Yi, Mantram Sharma.</li>
<li>Free software distributed under the MIT License.</li>
<li><a href="reference/index.html">API Reference</a></li>
<li><a href="#installation">Installation &amp; Setup</a></li>
<li><a href="#usage-example">Usage Examples</a></li>
<li><a href="#copyright">Copyright</a></li>
</ul>
<hr>
</section>
<section id="installation" class="level3">
<h3 class="anchored" data-anchor-id="installation">Installation</h3>
<div class="sourceCode" id="cb1"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install <span class="at">-e</span> .</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="usage-example" class="level3">
<h3 class="anchored" data-anchor-id="usage-example">Usage Example</h3>
<div class="sourceCode" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> autoeda <span class="im">import</span> get_summary_df</span>
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a>summary <span class="op">=</span> get_summary_df(df, target<span class="op">=</span><span class="st">"outcome"</span>)</span>
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(summary.head())</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="copyright" class="level3">
<h3 class="anchored" data-anchor-id="copyright">Copyright</h3>
<p>Copyright © 2026 Eli Gonzalez, Gurleen Kaur, Gloria Yi, Mantram Sharma.</p>
<p>Free software distributed under the MIT License.</p>


</section>
Expand Down Expand Up @@ -207,14 +229,13 @@ <h2 class="anchored" data-anchor-id="copyright">Copyright</h2>
e.clearSelection();
}
const getTextToCopy = function(trigger) {
const outerScaffold = trigger.parentElement.cloneNode(true);
const codeEl = outerScaffold.querySelector('code');
for (const childEl of codeEl.children) {
if (isCodeAnnotation(childEl)) {
childEl.remove();
const codeEl = trigger.previousElementSibling.cloneNode(true);
for (const childEl of codeEl.children) {
if (isCodeAnnotation(childEl)) {
childEl.remove();
}
}
}
return codeEl.innerText;
return codeEl.innerText;
}
const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
text: getTextToCopy
Expand Down
Loading
Loading