DoD Contracts, Maps and Austria
This commit is contained in:
1
WarContracts/contract_list.json
Normal file
1
WarContracts/contract_list.json
Normal file
File diff suppressed because one or more lines are too long
1
WarContracts/final_results.json
Normal file
1
WarContracts/final_results.json
Normal file
@@ -0,0 +1 @@
|
|||||||
|
[{"og_contract":
|
||||||
281
WarContracts/get_contracts.py
Normal file
281
WarContracts/get_contracts.py
Normal file
@@ -0,0 +1,281 @@
|
|||||||
|
import os, sys
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
sys.path.append('/var/www/digisnaxx.ado/scrapers')
|
||||||
|
import dtss
|
||||||
|
dtss.getReady()
|
||||||
|
|
||||||
|
from time import sleep
|
||||||
|
from pprint import pprint as ppr
|
||||||
|
import pytz
|
||||||
|
|
||||||
|
from selenium import webdriver as wd
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
|
from contracts.models import Contract, Company, Paragraph, OriginalContract
|
||||||
|
|
||||||
|
|
||||||
|
print("\n+++++\n+++++\n+++++\nStarting Scrape\n+++++\n+++++\n+++++\n")
|
||||||
|
|
||||||
|
tz_str = " -0600 UTC"
|
||||||
|
DFORMAT = "%b %d, %Y %H:%M %p %z %Z"
|
||||||
|
D2FORMAT = "%b %d, %Y %z %Z"
|
||||||
|
D3FORMAT = "%b %d %Y %z %Z"
|
||||||
|
|
||||||
|
SAM_BASE = "https://sam.gov"
|
||||||
|
SAMLINK = "https://sam.gov/search/?index=opp&page=1&pageSize=25&sort=-modifiedDate&sfm%5Bstatus%5D%5Bis_active%5D=true&sfm%5Bstatus%5D%5Bis_inactive%5D=true&sfm%5BsimpleSearch%5D%5BkeywordRadio%5D=ALL&sfm%5BsimpleSearch%5D%5BkeywordTags%5D%5B0%5D%5Bvalue%5D="
|
||||||
|
|
||||||
|
|
||||||
|
base_site = "https://www.war.gov/News/Contracts/"
|
||||||
|
|
||||||
|
|
||||||
|
br = wd.Chrome()
|
||||||
|
br.get(base_site)
|
||||||
|
ps = html.fromstring(br.page_source)
|
||||||
|
# links = ps.xpath('.//*/p[@class="title"]/a/@href')
|
||||||
|
page_links = []
|
||||||
|
|
||||||
|
for site in range(2,3):
|
||||||
|
nsite = base_site + "?Page=" + str(site)
|
||||||
|
br.get(nsite)
|
||||||
|
sleep(2)
|
||||||
|
ps = html.fromstring(br.page_source)
|
||||||
|
page_links = page_links + ps.xpath('.//*/p[@class="title"]/a/@href')
|
||||||
|
|
||||||
|
|
||||||
|
# for site in range(10,25):
|
||||||
|
# nsite = base_site + "?Page=" + str(site)
|
||||||
|
# br.get(nsite)
|
||||||
|
# sleep(2)
|
||||||
|
# ps = html.fromstring(br.page_source)
|
||||||
|
# page_links = page_links + ps.xpath('.//*/p[@class="title"]/a/@href')
|
||||||
|
|
||||||
|
|
||||||
|
data = []
|
||||||
|
base_data = []
|
||||||
|
|
||||||
|
for link in page_links:
|
||||||
|
br.get(link)
|
||||||
|
ps = html.fromstring(br.page_source)
|
||||||
|
paras = ps.xpath('.//*/p/text()')
|
||||||
|
# print(len(paras), link)
|
||||||
|
for para in paras:
|
||||||
|
nlink = " ".join(link.split("-")[-3:])[:-1] + tz_str
|
||||||
|
new_paragraph, created = Paragraph.objects.get_or_create(
|
||||||
|
link = link,
|
||||||
|
paragraph=para,
|
||||||
|
date = datetime.strptime(nlink, D3FORMAT),
|
||||||
|
)
|
||||||
|
print(created, new_paragraph)
|
||||||
|
|
||||||
|
contracts = re.findall(r'[A-Z0-9-]{12,}', para)
|
||||||
|
if len(contracts) > 0:
|
||||||
|
# print(contracts)
|
||||||
|
for contract in contracts:
|
||||||
|
i = {}
|
||||||
|
i['para'] = new_paragraph.id
|
||||||
|
i['contract'] = contract.replace("-", "")
|
||||||
|
i['website'] = link
|
||||||
|
new_contract, created = OriginalContract.objects.get_or_create(
|
||||||
|
para = new_paragraph,
|
||||||
|
number= i['contract'],
|
||||||
|
)
|
||||||
|
print(created, new_contract)
|
||||||
|
data.append(i)
|
||||||
|
|
||||||
|
|
||||||
|
with open('contract_list.json', 'w') as fp:
|
||||||
|
json.dump(data, fp)
|
||||||
|
|
||||||
|
|
||||||
|
for d in data:
|
||||||
|
website = SAMLINK + d['contract']
|
||||||
|
print("\nGetting Website: ", website)
|
||||||
|
br.get(website)
|
||||||
|
sleep(2)
|
||||||
|
links = []
|
||||||
|
ps = html.fromstring(br.page_source)
|
||||||
|
links = ps.xpath('.//*/h3/a[@class="usa-link ng-star-inserted"]/@href')
|
||||||
|
for link in links:
|
||||||
|
print("\n++++++++\n", d['website'], "\n++++++++++\n")
|
||||||
|
br.get(SAM_BASE + link)
|
||||||
|
print("\nContract link: ", SAM_BASE + link)
|
||||||
|
sleep(4)
|
||||||
|
ps = html.fromstring(br.page_source)
|
||||||
|
i = {}
|
||||||
|
|
||||||
|
|
||||||
|
i['og_contract'] = OriginalContract.objects.get(number=d['contract'], para__id=d['para'])
|
||||||
|
try:
|
||||||
|
i['pub_date_txt'] = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[4]/div[4]/div/h5/text()')[0].strip()
|
||||||
|
except:
|
||||||
|
i['pub_date_txt'] = "missed"
|
||||||
|
try:
|
||||||
|
i['title'] = ps.xpath('.//*/h1[@class="card-title"]/text()')[0].strip()
|
||||||
|
except:
|
||||||
|
i['title'] = ps.xpath('.//*/h1[@class="card-title"]/text()')
|
||||||
|
try:
|
||||||
|
i['notice_id'] = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[1]/div[2]/div/h5/text()')[0].strip()
|
||||||
|
except:
|
||||||
|
i['notice_id'] = "missed"
|
||||||
|
|
||||||
|
contract, created = Contract.objects.get_or_create(
|
||||||
|
title=i["title"],
|
||||||
|
original_contract_number = i["og_contract"],
|
||||||
|
notice_id=i["notice_id"],
|
||||||
|
pub_date_txt = i["pub_date_txt"],
|
||||||
|
contract_url = br.current_url
|
||||||
|
)
|
||||||
|
print('CONTRACT SUCCESS', created, contract)
|
||||||
|
|
||||||
|
contract.inactive_date = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[3]/div[2]/div/h5/text()')[0].strip()
|
||||||
|
contract.inactive_policy = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[3]/div[4]/div/h5/text()')[0].strip()
|
||||||
|
# contract.pub_date_txt = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[4]/div[4]/div/h5/text()')[0].strip()
|
||||||
|
contract.resp_date = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[4]/div[2]/div/h5/text()')[0].strip()
|
||||||
|
contract.us_dept = ps.xpath('.//*[@id="solicitation"]/div/div/div[4]/div/div[1]/div[1]/div[2]/h5/text()')[0].strip()
|
||||||
|
contract.us_dept_subtier = ps.xpath('.//*[@id="solicitation"]/div/div/div[4]/div/div[1]/div[2]/div[2]/h5/text()')[0].strip()
|
||||||
|
contract.related_notice_id = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[1]/div[4]/div/h5/text()')[0].strip()
|
||||||
|
contract.opp_type = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[2]/div[2]/div/h5/text()')[0].strip()
|
||||||
|
contract.orig_set = ps.xpath('.//*[@id="class"]/div[2]/div[1]/div[2]/h5/text()')[0].strip()
|
||||||
|
contract.prod_svc_code = ps.xpath('.//*[@id="class"]/div[2]/div[2]/div[2]/div/h5/text()')[0].strip()
|
||||||
|
contract.naics_code = ps.xpath('.//*[@id="class"]/div[2]/div[2]/div[4]/div/h5/text()')[0].strip()
|
||||||
|
contract.save()
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
contract.us_office = ps.xpath('.//*[@id="solicitation"]/div/div/div[4]/div/div[2]/div/div[2]/h5/text()')[0].strip()
|
||||||
|
contract.save()
|
||||||
|
except:
|
||||||
|
contract.us_office = "naan"
|
||||||
|
contract.save()
|
||||||
|
try:
|
||||||
|
contract.line_num = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[2]/div[4]/div/h5/text()')[0].strip()
|
||||||
|
contract.save()
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
contract.major_cmd = ps.xpath('.//*[@id="solicitation"]/div/div/div[4]/div/div[1]/div[3]/div[2]/h5/text()')[0].strip()
|
||||||
|
contract.save()
|
||||||
|
except:
|
||||||
|
contract.major_cmd = "naan"
|
||||||
|
contract.save()
|
||||||
|
try:
|
||||||
|
i['awarded_name'] = ps.xpath('.//*[@id="award-details"]/div[2]/div/div[4]/div[2]/div/h5/text()')[0].strip()
|
||||||
|
except:
|
||||||
|
i['awarded_name'] = "naan"
|
||||||
|
try:
|
||||||
|
i['award_date'] = ps.xpath('.//*[@id="award-details"]/div[2]/div/div[1]/div[2]/div/h5/text()')[0].strip()
|
||||||
|
contract.save()
|
||||||
|
except:
|
||||||
|
i['award_date'] = "naan"
|
||||||
|
contract.save()
|
||||||
|
try:
|
||||||
|
contract.award_num = ps.xpath('.//*[@id="award-details"]/div[2]/div/div[2]/div[2]/div/h5/text()')[0].strip()
|
||||||
|
contract.save()
|
||||||
|
except:
|
||||||
|
contract.award_num = "naan"
|
||||||
|
contract.save()
|
||||||
|
try:
|
||||||
|
i['unq_entity_id'] = ps.xpath('.//*[@id="award-details"]/div[2]/div/div[3]/div[3]/div/h5/text()')[0].strip()
|
||||||
|
contract.unq_entity_id = ps.xpath('.//*[@id="award-details"]/div[2]/div/div[3]/div[3]/div/h5/text()')[0].strip()
|
||||||
|
contract.save()
|
||||||
|
except:
|
||||||
|
contract.unq_entity_id = "naan"
|
||||||
|
contract.save()
|
||||||
|
try:
|
||||||
|
i['awarded_addr'] = ps.xpath('.//*[@id="award-details"]/div[2]/div/div[2]/div[3]/div[2]/h5/text()')[0].strip()
|
||||||
|
except:
|
||||||
|
i['awarded_addr'] = "naan"
|
||||||
|
try:
|
||||||
|
contract.contract_value = ps.xpath('.//*[@id="award-details"]/div[2]/div/div[3]/div[4]/div[3]/div/h5/text()')[0].strip()
|
||||||
|
contract.save()
|
||||||
|
except:
|
||||||
|
contract.contract_value = "naan"
|
||||||
|
contract.save()
|
||||||
|
try:
|
||||||
|
contract.description = ps.xpath('.//*[@id="desc"]/div[2]/div/div/p/text()')[0].strip()
|
||||||
|
contract.save()
|
||||||
|
except:
|
||||||
|
contract.description = "naan"
|
||||||
|
contract.save()
|
||||||
|
|
||||||
|
base_data.append(i)
|
||||||
|
|
||||||
|
print("\nappended info: ", i['title'])
|
||||||
|
try:
|
||||||
|
new_company, created = Company.objects.get_or_create(
|
||||||
|
name = i["awarded_name"],
|
||||||
|
address_complete=i["awarded_addr"],
|
||||||
|
unq_entity_id = i["unq_entity_id"],
|
||||||
|
)
|
||||||
|
print("COMPANY CREATED: ", created, new_company)
|
||||||
|
contract.company = new_company
|
||||||
|
contract.save()
|
||||||
|
except Exception as e:
|
||||||
|
print("\nCompany error: ", e)
|
||||||
|
try:
|
||||||
|
if contract.pub_date_txt.split(" ")[-1:][0].strip() == "EST":
|
||||||
|
print("TZ: ", contract.pub_date_txt.split(" ")[-1:][0].strip())
|
||||||
|
ndate = " ".join(contract.pub_date_txt.split(" ")[:-1]) + " -0500 UTC"
|
||||||
|
contract.pub_date = datetime.strptime(ndate, DFORMAT)
|
||||||
|
contract.save()
|
||||||
|
elif contract.pub_date_txt.split(" ")[-1:][0].strip() == "EDT":
|
||||||
|
ndate = " ".join(contract.pub_date_txt.split(" ")[:-1]) + " -0500 UTC"
|
||||||
|
contract.pub_date = datetime.strptime(ndate, DFORMAT)
|
||||||
|
contract.save()
|
||||||
|
elif contract.pub_date_txt.split(" ")[-1:][0].strip() == "CST":
|
||||||
|
ndate = " ".join(contract.pub_date_txt.split(" ")[:-1]) + " -0600 UTC"
|
||||||
|
contract.pub_date = datetime.strptime(ndate, DFORMAT)
|
||||||
|
contract.save()
|
||||||
|
elif contract.pub_date_txt.split(" ")[-1:][0].strip() == "CDT":
|
||||||
|
ndate = " ".join(contract.pub_date_txt.split(" ")[:-1]) + " -0600 UTC"
|
||||||
|
contract.pub_date = datetime.strptime(ndate, DFORMAT)
|
||||||
|
contract.save()
|
||||||
|
elif contract.pub_date_txt.split(" ")[-1:][0].strip() == "MST":
|
||||||
|
ndate = " ".join(contract.pub_date_txt.split(" ")[:-1]) + " -0700 UTC"
|
||||||
|
contract.pub_date = datetime.strptime(ndate, DFORMAT)
|
||||||
|
contract.save()
|
||||||
|
elif contract.pub_date_txt.split(" ")[-1:][0].strip() == "PST":
|
||||||
|
ndate = " ".join(contract.pub_date_txt.split(" ")[:-1]) + " -0800 UTC"
|
||||||
|
contract.pub_date = datetime.strptime(ndate, DFORMAT)
|
||||||
|
contract.save()
|
||||||
|
except Exception as e:
|
||||||
|
print("Date Error: ", e)
|
||||||
|
pass
|
||||||
|
# try:
|
||||||
|
# contract.major_cmd = i["major_cmd"],
|
||||||
|
# contract.save()
|
||||||
|
# except Exception as e:
|
||||||
|
# print("Major Cmd Error: ", e)
|
||||||
|
# pass
|
||||||
|
try:
|
||||||
|
time = " -0500 UTC"
|
||||||
|
contract.award_date = datetime.strptime(i["award_date"] + time, D2FORMAT)
|
||||||
|
contract.save()
|
||||||
|
except Exception as e:
|
||||||
|
print("Award Date Error: ", e)
|
||||||
|
pass
|
||||||
|
# try:
|
||||||
|
# contract.award_num = i["award_num"]
|
||||||
|
# contract.save()
|
||||||
|
# except Exception as e:
|
||||||
|
# print("Award Number Error: ", e)
|
||||||
|
# pass
|
||||||
|
# try:
|
||||||
|
# contract.contract_value = i["contract_value"]
|
||||||
|
# contract.save()
|
||||||
|
# except Exception as e:
|
||||||
|
# print("Major Cmd Error: ", e)
|
||||||
|
# pass
|
||||||
|
# try:
|
||||||
|
# contract.unq_entity_id = i["unq_entity_id"]
|
||||||
|
# contract.save()
|
||||||
|
# except Exception as e:
|
||||||
|
# print("Major Cmd Error: ", e)
|
||||||
|
# pass
|
||||||
|
|
||||||
|
|
||||||
|
with open('final_results.json', 'w') as fp:
|
||||||
|
json.dump(base_data, fp)
|
||||||
@@ -75,15 +75,14 @@ for hE in houseEvents:
|
|||||||
event_type = 'Gv',
|
event_type = 'Gv',
|
||||||
show_title = title,
|
show_title = title,
|
||||||
show_link = link,
|
show_link = link,
|
||||||
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
|
show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
|
||||||
show_day = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT).date(),
|
|
||||||
# more_details = details['location'],
|
# more_details = details['location'],
|
||||||
venue = venue,
|
venue = venue,
|
||||||
scraper = scraper
|
scraper = scraper
|
||||||
)
|
)
|
||||||
|
|
||||||
if type(new_event) is tuple:
|
if type(new_event) is tuple:
|
||||||
print("STS: ", new_event)
|
print("HOUSE: ", new_event)
|
||||||
add_calendar(new_event[0], 'msp')
|
add_calendar(new_event[0], 'msp')
|
||||||
else:
|
else:
|
||||||
add_calendar(new_event, 'msp')
|
add_calendar(new_event, 'msp')
|
||||||
@@ -112,8 +111,7 @@ for sE in senateEvents:
|
|||||||
event_type = 'Gv',
|
event_type = 'Gv',
|
||||||
show_title = title,
|
show_title = title,
|
||||||
show_link = link,
|
show_link = link,
|
||||||
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
|
show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
|
||||||
show_day = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT).date(),
|
|
||||||
# more_details = details['location'],
|
# more_details = details['location'],
|
||||||
venue = venue,
|
venue = venue,
|
||||||
scraper = scraper
|
scraper = scraper
|
||||||
@@ -146,14 +144,13 @@ for cE in commEvents:
|
|||||||
event_type = 'Gv',
|
event_type = 'Gv',
|
||||||
show_title = title,
|
show_title = title,
|
||||||
show_link = link,
|
show_link = link,
|
||||||
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
|
show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
|
||||||
show_day = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT).date(),
|
|
||||||
# more_details = details['location'],
|
# more_details = details['location'],
|
||||||
venue = venue,
|
venue = venue,
|
||||||
scraper = scraper
|
scraper = scraper
|
||||||
)
|
)
|
||||||
if type(new_event) is tuple:
|
if type(new_event) is tuple:
|
||||||
print("STS: ", new_event)
|
print("COMMS: ", new_event)
|
||||||
add_calendar(new_event[0], 'msp')
|
add_calendar(new_event[0], 'msp')
|
||||||
else:
|
else:
|
||||||
add_calendar(new_event, 'msp')
|
add_calendar(new_event, 'msp')
|
||||||
|
|||||||
@@ -66,7 +66,6 @@ def getEvents(br):
|
|||||||
show_title = title,
|
show_title = title,
|
||||||
show_link = link,
|
show_link = link,
|
||||||
show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
|
show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
|
||||||
show_day = datetime.strptime(dateTime, DATETIME_FORMAT),
|
|
||||||
venue = venue,
|
venue = venue,
|
||||||
scraper = scraper
|
scraper = scraper
|
||||||
)
|
)
|
||||||
|
|||||||
72
Working/venues/getMaps.py
Normal file
72
Working/venues/getMaps.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
import os, sys
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from dateutil import relativedelta
|
||||||
|
from pprint import pprint as ppr
|
||||||
|
|
||||||
|
sys.path.append('/var/www/digisnaxx.ado/scrapers')
|
||||||
|
import dtss
|
||||||
|
dtss.getReady()
|
||||||
|
|
||||||
|
from time import sleep
|
||||||
|
from pprint import pprint as ppr
|
||||||
|
import pytz
|
||||||
|
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
|
||||||
|
from events.models import Organization, Scraper, Calendar, Event
|
||||||
|
import events.digitools as digitools
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
def getSite(br, website, org):
|
||||||
|
ps = digitools.getSource(br, website)
|
||||||
|
links = ps.xpath('.//*/div[@class="luibr"]/div/div/a/@href')
|
||||||
|
ppr(links)
|
||||||
|
nsite = "https://www.google.com"
|
||||||
|
if len(links) > 0:
|
||||||
|
nlink = nsite + links[0]
|
||||||
|
org.barrio = nlink
|
||||||
|
org.save()
|
||||||
|
sleep(5)
|
||||||
|
|
||||||
|
|
||||||
|
if len(sys.argv) >= 2:
|
||||||
|
arg1 = sys.argv[1]
|
||||||
|
br = digitools.getBrowser(arg1)
|
||||||
|
else:
|
||||||
|
print("No run_env")
|
||||||
|
br.close()
|
||||||
|
quit()
|
||||||
|
|
||||||
|
|
||||||
|
orgs = Organization.objects.all()
|
||||||
|
|
||||||
|
|
||||||
|
# for org in orgs:
|
||||||
|
# try:
|
||||||
|
# if len(org.city) > 0:
|
||||||
|
# msg = org.name.split(" ") + org.city.split(" ")
|
||||||
|
# site = "https://www.google.com/search?q=google+maps+" + "+".join(msg)
|
||||||
|
# print(site)
|
||||||
|
# getSite(br, site, org)
|
||||||
|
# except Exception as e:
|
||||||
|
# ppr(org)
|
||||||
|
# print(e)
|
||||||
|
|
||||||
|
|
||||||
|
for org in orgs[5:]:
|
||||||
|
try:
|
||||||
|
br.get(org.barrio)
|
||||||
|
org.gmap_link = org.barrio
|
||||||
|
sleep(5)
|
||||||
|
current_url = br.current_url
|
||||||
|
match = re.search(r'@(-?\d+\.\d+),(-?\d+\.\d+)', current_url)
|
||||||
|
print(match[1], match[0].split(",")[1])
|
||||||
|
org.latitude = match[1].strip()
|
||||||
|
org.longitude = match[0].split(",")[1].strip()
|
||||||
|
org.save()
|
||||||
|
except Exception as e:
|
||||||
|
ppr(org)
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
br.close()
|
||||||
@@ -44,7 +44,7 @@ def get_events(ps, event_type):
|
|||||||
try:
|
try:
|
||||||
event['support'] = c.xpath('.//p[@class="fs-12 supporting-talent"]/text()')[0]
|
event['support'] = c.xpath('.//p[@class="fs-12 supporting-talent"]/text()')[0]
|
||||||
except:
|
except:
|
||||||
pass
|
continue
|
||||||
event['venue'] = c.xpath('.//p[@class="fs-12 venue"]/text()')[0]
|
event['venue'] = c.xpath('.//p[@class="fs-12 venue"]/text()')[0]
|
||||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||||
event['link'] = venue.website
|
event['link'] = venue.website
|
||||||
|
|||||||
@@ -49,31 +49,33 @@ def getLinks(br, url, links):
|
|||||||
div = br.find_element(By.XPATH, ".//*/div[@class='feed-pagination flexify']/a")
|
div = br.find_element(By.XPATH, ".//*/div[@class='feed-pagination flexify']/a")
|
||||||
div.click()
|
div.click()
|
||||||
sleep(2)
|
sleep(2)
|
||||||
br.execute_script("window.scrollTo(0, window.scrollY + 1100)")
|
br.execute_script("window.scrollTo(0, window.scrollY + 1375)")
|
||||||
sleep(2)
|
sleep(2)
|
||||||
except:
|
except:
|
||||||
x = 0
|
x = 0
|
||||||
ps = html.fromstring(br.page_source)
|
ps = html.fromstring(br.page_source)
|
||||||
newLinks = []
|
newLinks = []
|
||||||
# newlinks = ps.xpath('.//*/div[@class="e-con-inner"]/*/a/@href')
|
# newlinks = ps.xpath('.//*/div[@class="e-con-inner"]/*/a/@href')
|
||||||
events = ps.xpath('.//*/div[@class="e-con-inner"]')
|
events = ps.xpath('.//*/div[@class="ts-preview"]')
|
||||||
for event in events:
|
for event in events:
|
||||||
ev = {}
|
ev = {}
|
||||||
try:
|
try:
|
||||||
ev['link'] = event.xpath('.//*/a/@href')[0]
|
ev['link'] = event.xpath('.//*/h3/a/@href')[0]
|
||||||
ev['title'] = event.xpath('.//*/h3/a/text()')[0]
|
ev['title'] = event.xpath('.//*/h3/a/text()')[0]
|
||||||
ev['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0].replace('\n', '').replace('\t', '')
|
ev['venue'] = event.xpath('.//*/ul/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/a[@class="ts-action-con"]/text()')[-1:][0].replace('\n', '').replace('\t', '')
|
||||||
# e['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0]
|
ev['venueLink'] = event.xpath('.//*/ul/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/a[@class="ts-action-con"]/@href')[-1:][0]
|
||||||
ev['venueLink'] = event.xpath('.//*/ul/li/a/@href')[1]
|
# ev['venueLink'] = event.xpath('.//*/ul/li/a/@href')[1]
|
||||||
label= event.xpath('.//*/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/div/text()')
|
label= event.xpath('.//*/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/div/text()')
|
||||||
ev['label'] = ''.join([x.replace('\t', '').replace('\n', '') for x in label]).strip()
|
ev['label'] = ''.join([x.replace('\t', '').replace('\n', '') for x in label]).strip()
|
||||||
newLinks.append(ev)
|
newLinks.append(ev)
|
||||||
|
# ppr(ev)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Error: ", ev, e)
|
print("Error: ", ev, e)
|
||||||
|
|
||||||
links = links + newLinks
|
links = links + newLinks
|
||||||
return links
|
return links
|
||||||
|
|
||||||
|
|
||||||
if len(sys.argv) >= 2:
|
if len(sys.argv) >= 2:
|
||||||
arg1 = sys.argv[1]
|
arg1 = sys.argv[1]
|
||||||
br = digitools.getBrowser(arg1)
|
br = digitools.getBrowser(arg1)
|
||||||
@@ -89,11 +91,15 @@ for url in urls:
|
|||||||
allLinks = getLinks(br, url, allLinks)
|
allLinks = getLinks(br, url, allLinks)
|
||||||
|
|
||||||
totalLinks = list({v['title']:v for v in allLinks}.values())
|
totalLinks = list({v['title']:v for v in allLinks}.values())
|
||||||
|
ppr(totalLinks)
|
||||||
ppr(len(totalLinks))
|
ppr(len(totalLinks))
|
||||||
# sortedlinks = allLinks.sort()
|
# sortedlinks = allLinks.sort()
|
||||||
# ppr(sortedlinks)
|
# ppr(sortedlinks)
|
||||||
|
|
||||||
for event in totalLinks:
|
# quit()
|
||||||
|
|
||||||
|
|
||||||
|
for event in totalLinks[175:250]:
|
||||||
br.get(event['link'])
|
br.get(event['link'])
|
||||||
sleep(1)
|
sleep(1)
|
||||||
ps = html.fromstring(br.page_source)
|
ps = html.fromstring(br.page_source)
|
||||||
@@ -156,6 +162,10 @@ for d in new_data:
|
|||||||
|
|
||||||
cal = Calendar.objects.get(shortcode='mde')
|
cal = Calendar.objects.get(shortcode='mde')
|
||||||
|
|
||||||
|
print("run paisa")
|
||||||
|
|
||||||
|
ppr(paisa)
|
||||||
|
|
||||||
for d in paisa:
|
for d in paisa:
|
||||||
try:
|
try:
|
||||||
nvenue, created = Organization.objects.get_or_create(
|
nvenue, created = Organization.objects.get_or_create(
|
||||||
@@ -170,17 +180,20 @@ for d in paisa:
|
|||||||
nvenue.address_complete = d['address']
|
nvenue.address_complete = d['address']
|
||||||
nvenue.save()
|
nvenue.save()
|
||||||
|
|
||||||
|
try:
|
||||||
new_event, created = Event.objects.update_or_create(
|
new_event, created = Event.objects.update_or_create(
|
||||||
event_type = d['category'],
|
event_type = d['category'],
|
||||||
show_title = d['title'],
|
show_title = d['title'],
|
||||||
show_link = d['link'],
|
show_link = d['link'],
|
||||||
show_date = d['dateStamp'],
|
show_date = d['dateStamp'],
|
||||||
show_day = d['dateStamp'],
|
|
||||||
scraper = scraper,
|
scraper = scraper,
|
||||||
venue = nvenue
|
venue = nvenue
|
||||||
)
|
)
|
||||||
new_event.calendar.add(cal)
|
new_event.calendar.add(cal)
|
||||||
new_event.save()
|
new_event.save()
|
||||||
|
print(new_event, created, new_event.scraper)
|
||||||
# print("Success:", new_event)
|
# print("Success:", new_event)
|
||||||
|
except Execption as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
digitools.updateScraper(scraper, item_count_start)
|
digitools.updateScraper(scraper, item_count_start)
|
||||||
@@ -81,11 +81,11 @@ for event in events:
|
|||||||
show_title = event["show_title"],
|
show_title = event["show_title"],
|
||||||
show_link = event["link"],
|
show_link = event["link"],
|
||||||
show_date = event["date_time"],
|
show_date = event["date_time"],
|
||||||
show_day = event["date_time"],
|
|
||||||
guests = " ".join(event["subtitle"]),
|
guests = " ".join(event["subtitle"]),
|
||||||
venue = venue
|
venue = venue
|
||||||
)
|
)
|
||||||
digitools.add_calendar(new_event, 'msp')
|
digitools.add_calendar(new_event, 'msp')
|
||||||
|
ppr(new_event)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("oops ", e, "\n\n", "Scraper:", scraper)
|
print("oops ", e, "\n\n", "Scraper:", scraper)
|
||||||
|
|
||||||
|
|||||||
@@ -55,7 +55,6 @@ def get_events(ps, event_type):
|
|||||||
show_title = event['title'],
|
show_title = event['title'],
|
||||||
show_link = event['link'],
|
show_link = event['link'],
|
||||||
show_date = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
|
show_date = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
|
||||||
show_day = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
|
|
||||||
more_details = event["details"],
|
more_details = event["details"],
|
||||||
venue = venue
|
venue = venue
|
||||||
)
|
)
|
||||||
@@ -71,7 +70,6 @@ def get_events(ps, event_type):
|
|||||||
show_title = event['title'],
|
show_title = event['title'],
|
||||||
show_link = event['link'],
|
show_link = event['link'],
|
||||||
show_date = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
|
show_date = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
|
||||||
show_day = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
|
|
||||||
more_details = event["details"],
|
more_details = event["details"],
|
||||||
venue = venue
|
venue = venue
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ scraper,item_count_start, virtcal = digitools.getScraper(venue, venue.website, '
|
|||||||
|
|
||||||
tz_str = " -0600 UTC"
|
tz_str = " -0600 UTC"
|
||||||
DATETIME_FORMAT = '%a %B %d @ %I:%M %p %Y %z %Z'
|
DATETIME_FORMAT = '%a %B %d @ %I:%M %p %Y %z %Z'
|
||||||
DATETIME_FORMAT_2 = '%a %B %d, %Y @ %I:%M %p %z %Z'
|
DATETIME_FORMAT_2 = '%a %B %d @ %I:%M %p %Y %z %Z'
|
||||||
calendar_url = 'https://noboolpresents.com/venues/uptown-vfw/'
|
calendar_url = 'https://noboolpresents.com/venues/uptown-vfw/'
|
||||||
current_year = str(datetime.now().year)
|
current_year = str(datetime.now().year)
|
||||||
|
|
||||||
@@ -50,32 +50,33 @@ def getEvents(br):
|
|||||||
title = event.xpath('.//*/h2[@class="alt-font"]/a/text()')[0].replace("\n", "").replace("\t", "")
|
title = event.xpath('.//*/h2[@class="alt-font"]/a/text()')[0].replace("\n", "").replace("\t", "")
|
||||||
link = event.xpath('.//*/h2[@class="alt-font"]/a/@href')[0]
|
link = event.xpath('.//*/h2[@class="alt-font"]/a/@href')[0]
|
||||||
try:
|
try:
|
||||||
|
print("First Try", dateTime)
|
||||||
new_event, created = Event.objects.update_or_create(
|
new_event, created = Event.objects.update_or_create(
|
||||||
scraper = scraper,
|
scraper = scraper,
|
||||||
event_type = 'Mu',
|
event_type = 'Mu',
|
||||||
show_title = title,
|
show_title = title,
|
||||||
show_link = link,
|
show_link = link,
|
||||||
show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
|
show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
|
||||||
show_day = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
|
|
||||||
venue = venue
|
venue = venue
|
||||||
)
|
)
|
||||||
digitools.add_calendar(new_event, 'msp')
|
digitools.add_calendar(new_event, 'msp')
|
||||||
scraper.items+=1
|
scraper.items+=1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
|
print("New Try", dateTime)
|
||||||
new_event, created = Event.objects.update_or_create(
|
new_event, created = Event.objects.update_or_create(
|
||||||
scraper = scraper,
|
scraper = scraper,
|
||||||
event_type = 'Mu',
|
event_type = 'Mu',
|
||||||
show_title = title,
|
show_title = title,
|
||||||
show_link = link,
|
show_link = link,
|
||||||
show_date = datetime.strptime(dateTime[:-4].strip() + tz_str, DATETIME_FORMAT_2),
|
show_date = datetime.strptime(dateTime.strip() + tz_str, DATETIME_FORMAT_2),
|
||||||
show_day = datetime.strptime(dateTime[:-4].strip() + tz_str, DATETIME_FORMAT_2),
|
|
||||||
venue = venue
|
venue = venue
|
||||||
)
|
)
|
||||||
digitools.add_calendar(new_event, 'msp')
|
digitools.add_calendar(new_event, 'msp')
|
||||||
scraper.items+=1
|
scraper.items+=1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("oops", e, dateTime[:-4])
|
print("oops", e)
|
||||||
|
ppr(event)
|
||||||
|
|
||||||
getEvents(br)
|
getEvents(br)
|
||||||
br.find_element(By.XPATH, './/*/li[@class="tribe-events-c-nav__list-item tribe-events-c-nav__list-item--next"]/a').click()
|
br.find_element(By.XPATH, './/*/li[@class="tribe-events-c-nav__list-item tribe-events-c-nav__list-item--next"]/a').click()
|
||||||
|
|||||||
68
Working/workshop/smedia/bluetweet.py
Normal file
68
Working/workshop/smedia/bluetweet.py
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
import os, sys
|
||||||
|
from datetime import datetime
|
||||||
|
from dateutil import relativedelta
|
||||||
|
|
||||||
|
from atproto import Client, client_utils
|
||||||
|
|
||||||
|
sys.path.append('/var/www/digisnaxx.ado/scrapers')
|
||||||
|
import dtss
|
||||||
|
dtss.getReady()
|
||||||
|
|
||||||
|
from time import sleep
|
||||||
|
from pprint import pprint as ppr
|
||||||
|
import pytz
|
||||||
|
|
||||||
|
|
||||||
|
from contracts.models import Contract
|
||||||
|
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||||
|
|
||||||
|
tz = pytz.timezone("US/Central")
|
||||||
|
|
||||||
|
# USERNAME = "dreamfreely.org"
|
||||||
|
# PASSWORD = "gU):3-BA]DaK[_K$DHmI"
|
||||||
|
|
||||||
|
USERNAME = "digisnaxx.bsky.social"
|
||||||
|
PASSWORD = "xv57-clj3-sjpw-iz5o"
|
||||||
|
|
||||||
|
client = Client()
|
||||||
|
client.login(USERNAME, PASSWORD)
|
||||||
|
|
||||||
|
contracts = Contract.objects.all().order_by('-pub_date')[35:]
|
||||||
|
|
||||||
|
for c in contracts:
|
||||||
|
URL = "https://www.digisnaxx.com/contract/" + str(c.id)
|
||||||
|
URL_TITLE = "DoD Contract {0}"
|
||||||
|
URL_DESCRIPTION = "DigiSnaxx Link to DoD Contract {0}"
|
||||||
|
try:
|
||||||
|
|
||||||
|
if len(c.contract_value) > 4 and c.company != None:
|
||||||
|
POST_TEXT = "\n\n{0} / {1}\n\nCompany: {2}\n\n{3}\n\nNAICS Code: {4}".format(c.us_dept, c.us_dept_sub_tier, c.company.name, c.contract_value, c.naics_code)
|
||||||
|
ALT_TEXT = " ID {0}. For the {1} sub-tier {2}. Contract awarded to {3}.".format(c.notice_id, c.us_dept, c.us_dept_sub_tier, c.company.name)
|
||||||
|
|
||||||
|
elif len(c.contract_value) == 4 and c.company != None:
|
||||||
|
POST_TEXT = "\n\n{0} / {1}\n\nCompany: {2}\n\nNAICS Code: {3}".format(c.us_dept, c.us_dept_sub_tier, c.company.name, c.naics_code)
|
||||||
|
ALT_TEXT = " ID {0}. For the {1} sub-tier {2}. Contract awarded to {3}. {4}".format(c.notice_id, c.us_dept, c.us_dept_sub_tier, c.company.name, c.naics_code)
|
||||||
|
|
||||||
|
elif len(c.contract_value) > 4 and c.company == None:
|
||||||
|
POST_TEXT = "\n\n{0} / {1}\n\n{2}\n\nNAICS Code: {3}".format(c.us_dept, c.us_dept_sub_tier, c.contract_value, c.naics_code)
|
||||||
|
ALT_TEXT = " ID {0}. For the {1} sub-tier {2}. {3}. There is no company.".format(c.notice_id, c.us_dept, c.us_dept_sub_tier, c.naics_code)
|
||||||
|
|
||||||
|
elif len(c.contract_value) == 4 and c.company == None:
|
||||||
|
POST_TEXT = "\n\n{0} / {1}\n\nNAICS Code: {2}".format(c.us_dept, c.us_dept_sub_tier, c.naics_code)
|
||||||
|
ALT_TEXT = "ID {0}. For the {1} sub-tier {2}. There is no company.".format(c.notice_id, c.us_dept, c.us_dept_sub_tier)
|
||||||
|
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
text = client_utils.TextBuilder().tag("#DoDContracts ", "DoDContracts").tag("#"+c.notice_id[:16], c.notice_id[:16],).text(POST_TEXT + "\n\n").link(URL, URL)
|
||||||
|
print(POST_TEXT)
|
||||||
|
ppr(c)
|
||||||
|
client.send_post(text=text)
|
||||||
|
sleep(25)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(c.notice_id)
|
||||||
|
print("Error: ", e)
|
||||||
|
|
||||||
|
|
||||||
|
# feed = client.get_author_feed(USERNAME, limit = 100)
|
||||||
Reference in New Issue
Block a user