mirror of
https://github.com/aljazceru/IngestRSS.git
synced 2025-12-17 05:54:22 +01:00
update
This commit is contained in:
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
227
monthly_ingestion.ipynb
Normal file
227
monthly_ingestion.ipynb
Normal file
@@ -0,0 +1,227 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# RSS Article Batch Download Examples\n",
|
||||||
|
"\n",
|
||||||
|
"This notebook demonstrates how to batch download RSS articles from S3."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from time import time\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Root imports\n",
|
||||||
|
"\n",
|
||||||
|
"from src.search.batch import S3BatchDownloader\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"\n",
|
||||||
|
"load_dotenv(override=True)\n",
|
||||||
|
"\n",
|
||||||
|
"downloader = S3BatchDownloader()\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize the Downloader"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"List permission: Allowed\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import boto3\n",
|
||||||
|
"from botocore.exceptions import ClientError\n",
|
||||||
|
"\n",
|
||||||
|
"s3 = boto3.client('s3')\n",
|
||||||
|
"\n",
|
||||||
|
"# Test ListObjects\n",
|
||||||
|
"try:\n",
|
||||||
|
" response = s3.list_objects_v2(Bucket=os.getenv(\"S3_BUCKET_NAME\"))\n",
|
||||||
|
" print(\"List permission: Allowed\")\n",
|
||||||
|
"except ClientError as e:\n",
|
||||||
|
" print(\"List permission: Denied\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Found 59592 objects to process\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" 53%|█████▎ | 31707/59592 [04:33<6:08:33, 1.26object/s]IOStream.flush timed out\n",
|
||||||
|
"100%|██████████| 59592/59592 [08:38<00:00, 114.83object/s] \n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Downloaded data to November-1.csv in 605.81 seconds\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"start = time()\n",
|
||||||
|
"output_path = \"November-1.csv\" # or \"consolidated_data.json\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Define date range\n",
|
||||||
|
"start_date = \"2024-11-17\" # FIXME: Fix the error where data can't be collected before the date you started collecting.\n",
|
||||||
|
"end_date = \"2024-11-22\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Start downloading\n",
|
||||||
|
"downloader.download_to_file(\n",
|
||||||
|
" output_path=output_path,\n",
|
||||||
|
" file_format=\"csv\", # or \"json\"\n",
|
||||||
|
" start_date=start_date,\n",
|
||||||
|
" end_date=end_date\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"print(f\"Downloaded data to {output_path} in {time() - start:.2f} seconds\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Found 50037 objects to process\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" 66%|██████▌ | 33067/50037 [04:07<11:49, 23.92object/s] IOStream.flush timed out\n",
|
||||||
|
"100%|██████████| 50037/50037 [07:11<00:00, 116.06object/s] \n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Downloaded data to November-2.csv in 539.74 seconds\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"start = time()\n",
|
||||||
|
"output_path = \"November-2.csv\" # or \"consolidated_data.json\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Define date range\n",
|
||||||
|
"start_date = \"2024-11-23\" # FIXME: Fix the error where data can't be collected before the date you started collecting.\n",
|
||||||
|
"end_date = \"2024-11-27\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Start downloading\n",
|
||||||
|
"downloader.download_to_file(\n",
|
||||||
|
" output_path=output_path,\n",
|
||||||
|
" file_format=\"csv\", # or \"json\"\n",
|
||||||
|
" start_date=start_date,\n",
|
||||||
|
" end_date=end_date\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"print(f\"Downloaded data to {output_path} in {time() - start:.2f} seconds\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"start = time()\n",
|
||||||
|
"output_path = \"November-3.csv\" # or \"consolidated_data.json\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Define date range\n",
|
||||||
|
"start_date = \"2024-11-8\" # FIXME: Fix the error where data can't be collected before the date you started collecting.\n",
|
||||||
|
"end_date = \"2024-11-30\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Start downloading\n",
|
||||||
|
"downloader.download_to_file(\n",
|
||||||
|
" output_path=output_path,\n",
|
||||||
|
" file_format=\"csv\", # or \"json\"\n",
|
||||||
|
" start_date=start_date,\n",
|
||||||
|
" end_date=end_date\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"print(f\"Downloaded data to {output_path} in {time() - start:.2f} seconds\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Aggregating"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "base",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.9"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -5,3 +5,4 @@ constructs==10.2.69
|
|||||||
# Optional, yet necessary for the Pinecone SDK functionality.
|
# Optional, yet necessary for the Pinecone SDK functionality.
|
||||||
pinecone
|
pinecone
|
||||||
openai
|
openai
|
||||||
|
tqdm
|
||||||
905
rss_feeds.json
905
rss_feeds.json
@@ -93,5 +93,910 @@
|
|||||||
"u": "https://www.aspireiq.com/blog/rss.xml",
|
"u": "https://www.aspireiq.com/blog/rss.xml",
|
||||||
"dt": 1699830841,
|
"dt": 1699830841,
|
||||||
"update": 1
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://theinfluencermarketingfactory.com/blogs/influencer-marketing-tips.atom",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.forbes.com/personal-brands/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.forbes.com/event-hosting/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://cmxhub.com/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.thepodcasthost.com/feed/podcast/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://podcastmovementsessions.libsyn.com/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://podcraft.libsyn.com/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "http://feeds.feedburner.com/streamingmedia/TV",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://livestream.com/blog/feed",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.ecommercetimes.com/perl/syndication/rssfull.pl",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.causevox.com/blog/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://hbr.org/rss/harvard-business-review",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.fastcompany.com/feed/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.thedrum.com/rss/news.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://crisis-response.com/Articles/index.php/rss-feeds",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://prcg.com/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.odwyerpr.com/rss-feed.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.prweek.com/uk/rss/news",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "http://feeds.feedburner.com/clickboothblog",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://feeds.feedburner.com/avantlinkblog",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.slickdeals.net/feeds/affiliate/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://neoreach.com/blog/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://onalytica.com/blog/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.braginskyassociates.com/blog-feed.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.sociallyawareblog.com/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.mediapost.com/rss/news/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.nikonusa.com/en/learn-and-explore/index.page",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://500px.com/blog/feed",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.modernretail.co.uk/category/industry/influencer-marketing/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.six-spoke.com/news-feed.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://izea.com/blogs/feeds/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.socialchain.com/blog/rss.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.lumanu.com/feed",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://later.com/blog/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.hopperhq.com/blog/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://fohr.co/rss.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.law360.com/rss/SCOTUS/all",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://feeds.washingtonpost.com/rss/politics/supreme-court",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://supreme.justia.com/supreme-court-rss/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.law.cornell.edu/supct/rss/supct_recent.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://law.justia.com/feeds/categories/criminal-law.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.abajournal.com/topics/criminal-justice-law/feed",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.lexology.com/feed/all/Civil%20Procedure",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://law.justia.com/feeds/categories/civil-law.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.law360.com/rss/civil/articles",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.abajournal.com/topics/civil-procedure/feed",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://abovethelaw.com/category/litigation/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.jdsupra.com/rss/feed?tags=civil-litigation",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://law.justia.com/feeds/categories/corporate-law.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.abajournal.com/topics/corporate-law/feed",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.law360.com/rss/corporate/articles",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.jdsupra.com/rss/feed?tags=corporate-law",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://feeds.feedburner.com/FindLawCorporateCounselNews",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://law.justia.com/feeds/categories/intellectual-property-law.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.abajournal.com/topics/intellectual+property+law/feed",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.ipprotheinternet.com/RSS",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.jamsadr.com/rss.aspx",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.cpradr.org/news-publications/articles/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.mediation.com/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.cedr.com/newsandevents/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://healthpolicy.usc.edu/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.fda.gov/about-fda/rss-feeds-fda",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.aha.org/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.beckershospitalreview.com/feeds/rss-articles.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.securityweek.com/rss-feeds",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.cnet.com/rss/news/security/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.law.ucla.edu/centers/social-policy/education-policy/news-and-events/rss-feeds",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.brookings.edu/feed/?type=content&cat=167",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.chalkbeat.org/feeds/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.insidehighered.com/rss/news",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://opil.ouplaw.com/search-result?siteToSearch=OCO&searchType=advanced&pageSize=10&sort=relevance&pageSize=10&sort=date-desc&q=*&fq=contentType:BookItem+OR+contentType:ChapterItem+OR+contentType:JournalItem+OR+contentType:ReferenceItem&fq=subject_Law:Constitutional+and+administrative+law&fl=title%2Cabstract%2CbodyText%2CbookTitle%2CjournalTitle%2Cauthor%2CpublisherName%2CpublicationDate%2CpublicationDateDisplay%2Cisbn%2Cdoi%2Cid%2Csubjects%2CcontentTypes%2CpeerReviewed%2CopenAccess%2Curl%2CthumbnailUrl%2CpdfUrl%2CxmlUrl%2ChtmlUrl%2CdocumentType%2Clanguage%2CaccessProviderName%2CaccessProviderDisplay&start=0",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.yalejreg.com/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.abajournal.com/topic/legal+ethics/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://georgetownlawjournal.org/feeds/journal-legal-ethics",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.lexology.com/gatewayrss/22/Feed.aspx?g=9f7d415b-070a-4c47-a086-c8b7dbb08d06",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.americanbar.org/content/dam/aba/publications/professional_lawyer/ProfLawyer_Journal_Fall_2019.rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://hq.ssrn.com/jourInvite.cfm?link=Legal-Writing-eJournal",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://writingcenterblog.unc.edu/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.legalbluebook.com/blog/rss.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.scribes.org/legal-writing-news?format=RSS",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://reason.com/volokh-conspiracy/feed/?cat=legal-writing",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.uscourts.gov/news/rss-feed",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.brennancenter.org/topics/judicial-selection/feed",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.acslaw.org/our-work/resources/feed/?category=judicial-nominations",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.pewresearch.org/topics/u-s-politics-policy/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.abajournal.com/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.courtlistener.com/docket/rss/ctapp/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.law.upenn.edu/live/rss/news.php",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.americanbar.org/groups/legal_education/resources/legal-education-news-feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://taxprof.typepad.com/taxprof_blog/legal_education/index.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.americanbar.org/groups/public_services/lawyer-referral/public-interest-connections/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.law.com/rss/nlj/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.law360.com/rss/legalindustry",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.washingtonpost.com/news/volokh-conspiracy/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.brennancenter.org/topics/public-interest-law/feed",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.law.com/rss/technology/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.abajournal.com/topic/law+firm+management/feed",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.lawtechnologytoday.org/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.abajournal.com/topic/pro_bono/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.legalaidnyc.org/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.floridabar.org/publications/news/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://immigrantjustice.org/rss.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.americanbar.org/groups/legal_services/publications/solo_small_firm_resources/solo-in-a-box-rss/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.nlada.org/rss.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.probono.net/news/rss/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.nalp.org/rss/news.rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.nita.org/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "http://www.federalevidence.com/rss.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://legaltalknetwork.com/feed/podcast/in-the-mind-of-a-trial-lawyer/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.courtroomview.com/category/news/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.lawfareblog.com/topic/international-law/feed",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.ibanet.org/RSS/IBA-RSS-News.aspx",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.asil.org/resources/feed",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.icc-cpi.int/NewsFeed.aspx",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.law.georgetown.edu/international-law-journal/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.ejil.org/rss_feeds.html",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.bloomberg.com/businessweekrss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.economist.com/sections/business-finance/rss.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.industryweek.com/rss.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.scdigest.com/rssfeeds.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.healthcarefinancenews.com/rss.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.ehstoday.com/rss.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.bloomberg.com/markets/sitemap.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.fool.com/feeds/all.rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.bea.gov/rss.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.worldbank.org/en/news/all/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://unece.org/info/news/news/rss.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.census.gov/economic-indicators/rss.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://venturebeat.com/category/startups/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://hbr.org/rss/topic/entrepreneurship",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://moz.com/blog/feed",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://salesloft.com/resources/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.bdc.ca/en/articles-tools/entrepreneur-toolkit/business-strategy-planning/marketing/pages/default.aspx?rss=true",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.crmbuyer.com/perl/syndication/rssfull.pl",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.copper.com/blog/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://blog.hubspot.com/crm/rss.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "http://www.scdigest.com/rssfeeds.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.manufacturing.net/rss/all",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.industryweek.com/rss/allarticles.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.hrmagazine.co.uk/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://hbr.org/feeds/topics/organizational-culture",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://cultureiq.com/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://officevibe.com/blog/feed",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://hbr.org/feeds/topics/leadership",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.mckinsey.com/rss/insights/leadership",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.strategy-business.com/rss/stratcon_all",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.gsb.stanford.edu/sites/default/files/feeds/cgri.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.navexglobal.com/en-us/company/blog/ethicscompliance/feed",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.accountingtoday.com/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.journalofaccountancy.com/rss.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.wsj.com/news/cfo-journal/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.bloomberg.com/professional/accounting-news/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.fm-magazine.com/news/rss.html",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.accountingweb.com/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.risk.net/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.sec.gov/rss/pressrel.rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.riskmanagementmonitor.com/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.isaca.org/resources/news-and-trends/isaca-now-blog/risk-management",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.mergermarket.com/rss/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.bloomberg.com/professional/ma-news/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://pitchbook.com/news/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.wsj.com/news/business/deals",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.forbes.com/mergers-acquisitions/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.accountingtoday.com/tag/mergers-acquisitions",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.crowdfundinsider.com/category/equity-crowdfunding/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.crowdfundinsider.com/category/p2p-lending/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.fundable.com/blog/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.startengine.com/blog/rss.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.bloomberg.com/topics/international-trade/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.wto.org/rss/news_rss_e.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://unctad.org/news/rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.forbes.com/international-business/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.sba.gov/rss/news.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://quickbooks.intuit.com/r/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.score.org/blog/feed",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.americanbar.org/content/dam/aba/rss/news/abanews.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://www.natlawreview.com/rss-feed.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://rss.nytimes.com/services/xml/rss/nyt/BusinessLaw.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://angel.co/blog.rss",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://hbr.org/technology/rss.xml",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"u": "https://sloanreview.mit.edu/topics/innovation/feed/",
|
||||||
|
"dt": 1699830841,
|
||||||
|
"update": 0
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -4,6 +4,10 @@ import sys
|
|||||||
import json
|
import json
|
||||||
from src.utils.retry_logic import retry_with_backoff
|
from src.utils.retry_logic import retry_with_backoff
|
||||||
from botocore.exceptions import ClientError
|
from botocore.exceptions import ClientError
|
||||||
|
from pinecone import Pinecone
|
||||||
|
from pinecone import ServerlessSpec
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -190,5 +194,23 @@ def deploy_infrastructure():
|
|||||||
}
|
}
|
||||||
])
|
])
|
||||||
|
|
||||||
|
if os.getenv("STORAGE_STRATEGY") == 'pinecone':
|
||||||
|
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
|
||||||
|
index_name = os.getenv("PINECONE_DB_NAME")
|
||||||
|
embedding_dim = os.getenv("VECTOR_EMBEDDING_DIM")
|
||||||
|
vector_search_metric = os.getenv("VECTOR_SEARCH_METRIC")
|
||||||
|
|
||||||
|
if index_name not in pc.list_indexes().names():
|
||||||
|
pc.create_index(
|
||||||
|
name=index_name,
|
||||||
|
dimension=int(embedding_dim),
|
||||||
|
metric=vector_search_metric,
|
||||||
|
spec = ServerlessSpec(
|
||||||
|
cloud="aws",
|
||||||
|
region=os.getenv("AWS_REGION"),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
deploy_infrastructure()
|
deploy_infrastructure()
|
||||||
@@ -1,13 +1,14 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from pinecone import Pinecone
|
from pinecone import Pinecone
|
||||||
|
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
|
||||||
# Set up Pinecone client
|
# Set up Pinecone client
|
||||||
api_key = os.getenv("PINCEONE_API_KEY")
|
api_key = os.getenv("PINCEONE_API_KEY")
|
||||||
shards = os.getenv("PINECONE_SHARDS")
|
shards = os.getenv("PINECONE_SHARDS")
|
||||||
embedding_model = os.getenv("VECTOR_EMBEDDING_MODEL")
|
embedding_model = os.getenv("VECTOR_EMBEDDING_MODEL")
|
||||||
embedding_dim = int(os.getenv("VECTOR_EMBEDDING_DIM"))
|
embedding_dim = os.getenv("VECTOR_EMBEDDING_DIM")
|
||||||
vector_search_metric = os.getenv("VECTOR_SEARCH_METRIC")
|
vector_search_metric = os.getenv("VECTOR_SEARCH_METRIC")
|
||||||
index_name = os.getenv("PINECONE_DB_NAME")
|
index_name = os.getenv("PINECONE_DB_NAME")
|
||||||
|
|
||||||
@@ -16,12 +17,7 @@ pc = Pinecone(api_key=api_key)
|
|||||||
|
|
||||||
def get_index():
|
def get_index():
|
||||||
if index_name not in pc.list_indexes().names():
|
if index_name not in pc.list_indexes().names():
|
||||||
pc.create_index(
|
return KeyError(f"Index {index_name} not found")
|
||||||
name=index_name,
|
|
||||||
dimension=embedding_dim,
|
|
||||||
metric=vector_search_metric,
|
|
||||||
shards=shards
|
|
||||||
)
|
|
||||||
|
|
||||||
index = pc.Index(index_name)
|
index = pc.Index(index_name)
|
||||||
return index
|
return index
|
||||||
@@ -29,7 +25,7 @@ def get_index():
|
|||||||
def vectorize(article:str) -> list[float]:
|
def vectorize(article:str) -> list[float]:
|
||||||
response = client.embeddings.create(
|
response = client.embeddings.create(
|
||||||
input=article,
|
input=article,
|
||||||
model=os.getenv('OPENAI_EMBEDDING_MODEL')
|
model=os.getenv('OPENAI_EMBEDDING_MODEL', 'text')
|
||||||
)
|
)
|
||||||
|
|
||||||
return response.data[0].embedding
|
return response.data[0].embedding
|
||||||
|
|||||||
@@ -34,7 +34,6 @@ def pinecone_save_article(article:dict):
|
|||||||
article["id"] = article["article_id"]
|
article["id"] = article["article_id"]
|
||||||
article["values"] = vectorize(article["content"])
|
article["values"] = vectorize(article["content"])
|
||||||
|
|
||||||
|
|
||||||
namespace = os.getenv('PINECONE_NAMESPACE')
|
namespace = os.getenv('PINECONE_NAMESPACE')
|
||||||
|
|
||||||
logger.info("Upserting article to Pinecone")
|
logger.info("Upserting article to Pinecone")
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
|||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
import logging
|
import logging
|
||||||
from string import Template
|
from string import Template
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
class S3BatchDownloader:
|
class S3BatchDownloader:
|
||||||
"""Class for batch downloading RSS articles from S3"""
|
"""Class for batch downloading RSS articles from S3"""
|
||||||
@@ -93,15 +94,17 @@ class S3BatchDownloader:
|
|||||||
if self._is_in_date_range(obj['LastModified'], start_ts, end_ts)
|
if self._is_in_date_range(obj['LastModified'], start_ts, end_ts)
|
||||||
]
|
]
|
||||||
self.logger.info(f"Found {len(objects)} objects to process")
|
self.logger.info(f"Found {len(objects)} objects to process")
|
||||||
|
print(f"Found {len(objects)} objects to process")
|
||||||
|
|
||||||
# Download and merge data
|
# Download and merge data
|
||||||
all_data = []
|
all_data = []
|
||||||
with ThreadPoolExecutor(max_workers=self.config['max_workers']) as executor:
|
with ThreadPoolExecutor(max_workers=self.config['max_workers']) as executor, tqdm(total=len(objects), unit="object") as progress_bar:
|
||||||
future_to_obj = {executor.submit(self._download_object, obj): obj for obj in objects}
|
future_to_obj = {executor.submit(self._download_object, obj): obj for obj in objects}
|
||||||
for future in as_completed(future_to_obj):
|
for future in as_completed(future_to_obj):
|
||||||
result = future.result()
|
result = future.result()
|
||||||
if result is not None:
|
if result is not None:
|
||||||
all_data.extend(result if isinstance(result, list) else [result])
|
all_data.extend(result if isinstance(result, list) else [result])
|
||||||
|
progress_bar.update(1)
|
||||||
|
|
||||||
# Save to file
|
# Save to file
|
||||||
self._save_to_file(all_data, output_path, file_format)
|
self._save_to_file(all_data, output_path, file_format)
|
||||||
|
|||||||
6
todo.md
6
todo.md
@@ -1,9 +1,15 @@
|
|||||||
# After Public Launch
|
# After Public Launch
|
||||||
* Monthly Kaggle Dataset Publishing.
|
* Monthly Kaggle Dataset Publishing.
|
||||||
|
|
||||||
|
* Vector Database Initialization at earlier phase. [ Done ]
|
||||||
* Test out Vector Databases at Small Scale.
|
* Test out Vector Databases at Small Scale.
|
||||||
|
* [ ] Testing
|
||||||
|
* [ ] Fix OpenAI Error.
|
||||||
|
* [ ] Let it run for a day
|
||||||
* Test out Vector Databases at Scale.
|
* Test out Vector Databases at Scale.
|
||||||
|
|
||||||
|
|
||||||
|
* Monthly ingestion job
|
||||||
* Protocol for annotating data.
|
* Protocol for annotating data.
|
||||||
* Promotional Material
|
* Promotional Material
|
||||||
* Stuff that is not news
|
* Stuff that is not news
|
||||||
|
|||||||
Reference in New Issue
Block a user