Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support exact search with quotes in the notebook #33

Merged
merged 3 commits into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 14 additions & 17 deletions edgar_tool/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,23 +132,20 @@ def text_search(
destination=output,
)
scraper = EdgarTextSearcher()
try:
scraper.text_search(
keywords=keywords,
entity_id=entity_id,
filing_form=TEXT_SEARCH_FILING_VS_MAPPING_CATEGORIES_MAPPING.get(filing_form),
single_forms=single_forms,
start_date=start_date,
end_date=end_date,
min_wait_seconds=min_wait,
max_wait_seconds=max_wait,
retries=retries,
destination=output,
peo_in=peo_in,
inc_in=inc_in
)
except NoResultsFoundError as e:
sys.exit(2)
scraper.text_search(
keywords=keywords,
entity_id=entity_id,
filing_form=TEXT_SEARCH_FILING_VS_MAPPING_CATEGORIES_MAPPING.get(filing_form),
single_forms=single_forms,
start_date=start_date,
end_date=end_date,
min_wait_seconds=min_wait,
max_wait_seconds=max_wait,
retries=retries,
destination=output,
peo_in=peo_in,
inc_in=inc_in
)

@staticmethod
def rss(
Expand Down
57 changes: 33 additions & 24 deletions notebook/Bellingcat_EDGAR_Tool.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
"from ipywidgets import widgets\n",
"from IPython.display import display\n",
"from IPython import get_ipython\n",
"from contextlib import redirect_stdout\n",
"\n",
"data_table.enable_dataframe_formatter()\n",
"\n",
Expand All @@ -74,33 +75,41 @@
"\n",
"# Install the EDGAR search tool on the first run\n",
"![ ! -f \"edgar_tool_installed\" ] && echo -n \"Loading the EDGAR Tool on first search...\" && pip install edgar-tool >> {logfile} 2>&1 && pip install pandas==1.5.3 >> {logfile} 2>&1 && touch edgar_tool_installed && echo \"Loaded.\"\n",
"from edgar_tool.cli import SecEdgarScraperCli as edgar_tool\n",
"from edgar_tool.page_fetcher import NoResultsFoundError\n",
"\n",
"# Run the tool with the query\n",
"!echo -n \"Searching EDGAR...\"\n",
"!edgar-tool text_search {search_keywords} --start_date {start_date} --end_date {end_date} --filing_form {filing_type} --entity_id {company_cik} --output {output} --browser firefox --min-wait 0.5 --max-wait 1.5 {loc_filter} >> {logfile} 2>&1\n",
"exit_code = get_ipython().__dict__['user_ns']['_exit_code']\n",
"!echo \"Done.\"\n",
"\n",
"# Error handling\n",
"if exit_code == 2:\n",
"print(\"Searching EDGAR...\")\n",
"try:\n",
" with open(logfile, 'a') as f:\n",
" with redirect_stdout(f):\n",
" edgar_tool.text_search(\n",
" search_keywords,\n",
" start_date=start_date, \n",
" end_date=end_date,\n",
" filing_form=filing_type,\n",
" entity_id=company_cik,\n",
" output=output,\n",
" peo_in=location if filter_by_location==\"Principal executive offices in\" else None,\n",
" inc_in=location if filter_by_location==\"Incorporated in\" else None,\n",
" )\n",
" print(\"Done.\")\n",
" # Load results\n",
" results = pd.read_csv(output)\n",
"\n",
" # Show download button\n",
" btn = widgets.Button(description='Download Results')\n",
" btn.on_click(lambda x: files.download(output))\n",
" display(btn)\n",
"\n",
" # Display the results in a data table\n",
" display(results)\n",
"except NoResultsFoundError:\n",
" print(\"\\x1b[33m No results were found for your query.\\x1b[0m\")\n",
"elif exit_code != 0:\n",
" print(\"\\x1b[31m Something went wrong with the EDGAR tool, check your search and try again.\\x1b[0m\")\n",
"else:\n",
" try:\n",
" # Load results\n",
" results = pd.read_csv(output)\n",
"\n",
" # Show download button\n",
" btn = widgets.Button(description='Download Results')\n",
" btn.on_click(lambda x: files.download(output))\n",
" display(btn)\n",
"\n",
" # Display the results in a data table\n",
" display(results)\n",
" except FileNotFoundError as e:\n",
" print(\"\\x1b[31m Something went wrong with the EDGAR tool, please get in touch at [email protected] and help us improve the tool for everyone. \\x1b[0m\")\n",
"\n"
"except FileNotFoundError as e:\n",
" print(\"\\x1b[31m Something went wrong with the EDGAR tool, please get in touch at [email protected] and help us improve the tool for everyone. \\x1b[0m\")\n",
"except Exception as e:\n",
" print(\"\\x1b[31m Something went wrong with the EDGAR tool, check your search and try again.\\x1b[0m\") "
]
}
],
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "edgar-tool"
version = "1.3.1"
version = "1.3.2"
description = "Search and retrieve corporate and financial data from the United States Securities and Exchange Commission (SEC)."
authors = ["Bellingcat"]
license = "GNU General Public License v3 (GPLv3)"
Expand Down