DoD Contracts, Maps and Austria

This commit is contained in:
2026-02-19 22:49:47 -05:00
parent 3c4a41ae2c
commit 3eda7647e9
13 changed files with 469 additions and 38 deletions

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
[{"og_contract":

View File

@@ -0,0 +1,281 @@
import os, sys
import json
import re
from datetime import datetime
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from selenium import webdriver as wd
from lxml import html
from contracts.models import Contract, Company, Paragraph, OriginalContract
print("\n+++++\n+++++\n+++++\nStarting Scrape\n+++++\n+++++\n+++++\n")
tz_str = " -0600 UTC"
DFORMAT = "%b %d, %Y %H:%M %p %z %Z"
D2FORMAT = "%b %d, %Y %z %Z"
D3FORMAT = "%b %d %Y %z %Z"
SAM_BASE = "https://sam.gov"
SAMLINK = "https://sam.gov/search/?index=opp&page=1&pageSize=25&sort=-modifiedDate&sfm%5Bstatus%5D%5Bis_active%5D=true&sfm%5Bstatus%5D%5Bis_inactive%5D=true&sfm%5BsimpleSearch%5D%5BkeywordRadio%5D=ALL&sfm%5BsimpleSearch%5D%5BkeywordTags%5D%5B0%5D%5Bvalue%5D="
base_site = "https://www.war.gov/News/Contracts/"
br = wd.Chrome()
br.get(base_site)
ps = html.fromstring(br.page_source)
# links = ps.xpath('.//*/p[@class="title"]/a/@href')
page_links = []
for site in range(2,3):
nsite = base_site + "?Page=" + str(site)
br.get(nsite)
sleep(2)
ps = html.fromstring(br.page_source)
page_links = page_links + ps.xpath('.//*/p[@class="title"]/a/@href')
# for site in range(10,25):
# nsite = base_site + "?Page=" + str(site)
# br.get(nsite)
# sleep(2)
# ps = html.fromstring(br.page_source)
# page_links = page_links + ps.xpath('.//*/p[@class="title"]/a/@href')
data = []
base_data = []
for link in page_links:
br.get(link)
ps = html.fromstring(br.page_source)
paras = ps.xpath('.//*/p/text()')
# print(len(paras), link)
for para in paras:
nlink = " ".join(link.split("-")[-3:])[:-1] + tz_str
new_paragraph, created = Paragraph.objects.get_or_create(
link = link,
paragraph=para,
date = datetime.strptime(nlink, D3FORMAT),
)
print(created, new_paragraph)
contracts = re.findall(r'[A-Z0-9-]{12,}', para)
if len(contracts) > 0:
# print(contracts)
for contract in contracts:
i = {}
i['para'] = new_paragraph.id
i['contract'] = contract.replace("-", "")
i['website'] = link
new_contract, created = OriginalContract.objects.get_or_create(
para = new_paragraph,
number= i['contract'],
)
print(created, new_contract)
data.append(i)
with open('contract_list.json', 'w') as fp:
json.dump(data, fp)
for d in data:
website = SAMLINK + d['contract']
print("\nGetting Website: ", website)
br.get(website)
sleep(2)
links = []
ps = html.fromstring(br.page_source)
links = ps.xpath('.//*/h3/a[@class="usa-link ng-star-inserted"]/@href')
for link in links:
print("\n++++++++\n", d['website'], "\n++++++++++\n")
br.get(SAM_BASE + link)
print("\nContract link: ", SAM_BASE + link)
sleep(4)
ps = html.fromstring(br.page_source)
i = {}
i['og_contract'] = OriginalContract.objects.get(number=d['contract'], para__id=d['para'])
try:
i['pub_date_txt'] = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[4]/div[4]/div/h5/text()')[0].strip()
except:
i['pub_date_txt'] = "missed"
try:
i['title'] = ps.xpath('.//*/h1[@class="card-title"]/text()')[0].strip()
except:
i['title'] = ps.xpath('.//*/h1[@class="card-title"]/text()')
try:
i['notice_id'] = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[1]/div[2]/div/h5/text()')[0].strip()
except:
i['notice_id'] = "missed"
contract, created = Contract.objects.get_or_create(
title=i["title"],
original_contract_number = i["og_contract"],
notice_id=i["notice_id"],
pub_date_txt = i["pub_date_txt"],
contract_url = br.current_url
)
print('CONTRACT SUCCESS', created, contract)
contract.inactive_date = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[3]/div[2]/div/h5/text()')[0].strip()
contract.inactive_policy = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[3]/div[4]/div/h5/text()')[0].strip()
# contract.pub_date_txt = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[4]/div[4]/div/h5/text()')[0].strip()
contract.resp_date = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[4]/div[2]/div/h5/text()')[0].strip()
contract.us_dept = ps.xpath('.//*[@id="solicitation"]/div/div/div[4]/div/div[1]/div[1]/div[2]/h5/text()')[0].strip()
contract.us_dept_subtier = ps.xpath('.//*[@id="solicitation"]/div/div/div[4]/div/div[1]/div[2]/div[2]/h5/text()')[0].strip()
contract.related_notice_id = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[1]/div[4]/div/h5/text()')[0].strip()
contract.opp_type = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[2]/div[2]/div/h5/text()')[0].strip()
contract.orig_set = ps.xpath('.//*[@id="class"]/div[2]/div[1]/div[2]/h5/text()')[0].strip()
contract.prod_svc_code = ps.xpath('.//*[@id="class"]/div[2]/div[2]/div[2]/div/h5/text()')[0].strip()
contract.naics_code = ps.xpath('.//*[@id="class"]/div[2]/div[2]/div[4]/div/h5/text()')[0].strip()
contract.save()
try:
contract.us_office = ps.xpath('.//*[@id="solicitation"]/div/div/div[4]/div/div[2]/div/div[2]/h5/text()')[0].strip()
contract.save()
except:
contract.us_office = "naan"
contract.save()
try:
contract.line_num = ps.xpath('.//*[@id="solicitation"]/div/div/div[3]/div[2]/div[4]/div/h5/text()')[0].strip()
contract.save()
except:
continue
try:
contract.major_cmd = ps.xpath('.//*[@id="solicitation"]/div/div/div[4]/div/div[1]/div[3]/div[2]/h5/text()')[0].strip()
contract.save()
except:
contract.major_cmd = "naan"
contract.save()
try:
i['awarded_name'] = ps.xpath('.//*[@id="award-details"]/div[2]/div/div[4]/div[2]/div/h5/text()')[0].strip()
except:
i['awarded_name'] = "naan"
try:
i['award_date'] = ps.xpath('.//*[@id="award-details"]/div[2]/div/div[1]/div[2]/div/h5/text()')[0].strip()
contract.save()
except:
i['award_date'] = "naan"
contract.save()
try:
contract.award_num = ps.xpath('.//*[@id="award-details"]/div[2]/div/div[2]/div[2]/div/h5/text()')[0].strip()
contract.save()
except:
contract.award_num = "naan"
contract.save()
try:
i['unq_entity_id'] = ps.xpath('.//*[@id="award-details"]/div[2]/div/div[3]/div[3]/div/h5/text()')[0].strip()
contract.unq_entity_id = ps.xpath('.//*[@id="award-details"]/div[2]/div/div[3]/div[3]/div/h5/text()')[0].strip()
contract.save()
except:
contract.unq_entity_id = "naan"
contract.save()
try:
i['awarded_addr'] = ps.xpath('.//*[@id="award-details"]/div[2]/div/div[2]/div[3]/div[2]/h5/text()')[0].strip()
except:
i['awarded_addr'] = "naan"
try:
contract.contract_value = ps.xpath('.//*[@id="award-details"]/div[2]/div/div[3]/div[4]/div[3]/div/h5/text()')[0].strip()
contract.save()
except:
contract.contract_value = "naan"
contract.save()
try:
contract.description = ps.xpath('.//*[@id="desc"]/div[2]/div/div/p/text()')[0].strip()
contract.save()
except:
contract.description = "naan"
contract.save()
base_data.append(i)
print("\nappended info: ", i['title'])
try:
new_company, created = Company.objects.get_or_create(
name = i["awarded_name"],
address_complete=i["awarded_addr"],
unq_entity_id = i["unq_entity_id"],
)
print("COMPANY CREATED: ", created, new_company)
contract.company = new_company
contract.save()
except Exception as e:
print("\nCompany error: ", e)
try:
if contract.pub_date_txt.split(" ")[-1:][0].strip() == "EST":
print("TZ: ", contract.pub_date_txt.split(" ")[-1:][0].strip())
ndate = " ".join(contract.pub_date_txt.split(" ")[:-1]) + " -0500 UTC"
contract.pub_date = datetime.strptime(ndate, DFORMAT)
contract.save()
elif contract.pub_date_txt.split(" ")[-1:][0].strip() == "EDT":
ndate = " ".join(contract.pub_date_txt.split(" ")[:-1]) + " -0500 UTC"
contract.pub_date = datetime.strptime(ndate, DFORMAT)
contract.save()
elif contract.pub_date_txt.split(" ")[-1:][0].strip() == "CST":
ndate = " ".join(contract.pub_date_txt.split(" ")[:-1]) + " -0600 UTC"
contract.pub_date = datetime.strptime(ndate, DFORMAT)
contract.save()
elif contract.pub_date_txt.split(" ")[-1:][0].strip() == "CDT":
ndate = " ".join(contract.pub_date_txt.split(" ")[:-1]) + " -0600 UTC"
contract.pub_date = datetime.strptime(ndate, DFORMAT)
contract.save()
elif contract.pub_date_txt.split(" ")[-1:][0].strip() == "MST":
ndate = " ".join(contract.pub_date_txt.split(" ")[:-1]) + " -0700 UTC"
contract.pub_date = datetime.strptime(ndate, DFORMAT)
contract.save()
elif contract.pub_date_txt.split(" ")[-1:][0].strip() == "PST":
ndate = " ".join(contract.pub_date_txt.split(" ")[:-1]) + " -0800 UTC"
contract.pub_date = datetime.strptime(ndate, DFORMAT)
contract.save()
except Exception as e:
print("Date Error: ", e)
pass
# try:
# contract.major_cmd = i["major_cmd"],
# contract.save()
# except Exception as e:
# print("Major Cmd Error: ", e)
# pass
try:
time = " -0500 UTC"
contract.award_date = datetime.strptime(i["award_date"] + time, D2FORMAT)
contract.save()
except Exception as e:
print("Award Date Error: ", e)
pass
# try:
# contract.award_num = i["award_num"]
# contract.save()
# except Exception as e:
# print("Award Number Error: ", e)
# pass
# try:
# contract.contract_value = i["contract_value"]
# contract.save()
# except Exception as e:
# print("Major Cmd Error: ", e)
# pass
# try:
# contract.unq_entity_id = i["unq_entity_id"]
# contract.save()
# except Exception as e:
# print("Major Cmd Error: ", e)
# pass
with open('final_results.json', 'w') as fp:
json.dump(base_data, fp)

View File

@@ -75,15 +75,14 @@ for hE in houseEvents:
event_type = 'Gv', event_type = 'Gv',
show_title = title, show_title = title,
show_link = link, show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT), show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT).date(),
# more_details = details['location'], # more_details = details['location'],
venue = venue, venue = venue,
scraper = scraper scraper = scraper
) )
if type(new_event) is tuple: if type(new_event) is tuple:
print("STS: ", new_event) print("HOUSE: ", new_event)
add_calendar(new_event[0], 'msp') add_calendar(new_event[0], 'msp')
else: else:
add_calendar(new_event, 'msp') add_calendar(new_event, 'msp')
@@ -112,8 +111,7 @@ for sE in senateEvents:
event_type = 'Gv', event_type = 'Gv',
show_title = title, show_title = title,
show_link = link, show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT), show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT).date(),
# more_details = details['location'], # more_details = details['location'],
venue = venue, venue = venue,
scraper = scraper scraper = scraper
@@ -146,14 +144,13 @@ for cE in commEvents:
event_type = 'Gv', event_type = 'Gv',
show_title = title, show_title = title,
show_link = link, show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT), show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT).date(),
# more_details = details['location'], # more_details = details['location'],
venue = venue, venue = venue,
scraper = scraper scraper = scraper
) )
if type(new_event) is tuple: if type(new_event) is tuple:
print("STS: ", new_event) print("COMMS: ", new_event)
add_calendar(new_event[0], 'msp') add_calendar(new_event[0], 'msp')
else: else:
add_calendar(new_event, 'msp') add_calendar(new_event, 'msp')

View File

@@ -66,7 +66,6 @@ def getEvents(br):
show_title = title, show_title = title,
show_link = link, show_link = link,
show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT), show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT),
venue = venue, venue = venue,
scraper = scraper scraper = scraper
) )

72
Working/venues/getMaps.py Normal file
View File

@@ -0,0 +1,72 @@
import os, sys
from datetime import datetime, timedelta
from dateutil import relativedelta
from pprint import pprint as ppr
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from selenium.webdriver.common.by import By
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
import re
def getSite(br, website, org):
ps = digitools.getSource(br, website)
links = ps.xpath('.//*/div[@class="luibr"]/div/div/a/@href')
ppr(links)
nsite = "https://www.google.com"
if len(links) > 0:
nlink = nsite + links[0]
org.barrio = nlink
org.save()
sleep(5)
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
orgs = Organization.objects.all()
# for org in orgs:
# try:
# if len(org.city) > 0:
# msg = org.name.split(" ") + org.city.split(" ")
# site = "https://www.google.com/search?q=google+maps+" + "+".join(msg)
# print(site)
# getSite(br, site, org)
# except Exception as e:
# ppr(org)
# print(e)
for org in orgs[5:]:
try:
br.get(org.barrio)
org.gmap_link = org.barrio
sleep(5)
current_url = br.current_url
match = re.search(r'@(-?\d+\.\d+),(-?\d+\.\d+)', current_url)
print(match[1], match[0].split(",")[1])
org.latitude = match[1].strip()
org.longitude = match[0].split(",")[1].strip()
org.save()
except Exception as e:
ppr(org)
print(e)
br.close()

View File

@@ -44,7 +44,7 @@ def get_events(ps, event_type):
try: try:
event['support'] = c.xpath('.//p[@class="fs-12 supporting-talent"]/text()')[0] event['support'] = c.xpath('.//p[@class="fs-12 supporting-talent"]/text()')[0]
except: except:
pass continue
event['venue'] = c.xpath('.//p[@class="fs-12 venue"]/text()')[0] event['venue'] = c.xpath('.//p[@class="fs-12 venue"]/text()')[0]
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT) event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = venue.website event['link'] = venue.website

View File

@@ -49,31 +49,33 @@ def getLinks(br, url, links):
div = br.find_element(By.XPATH, ".//*/div[@class='feed-pagination flexify']/a") div = br.find_element(By.XPATH, ".//*/div[@class='feed-pagination flexify']/a")
div.click() div.click()
sleep(2) sleep(2)
br.execute_script("window.scrollTo(0, window.scrollY + 1100)") br.execute_script("window.scrollTo(0, window.scrollY + 1375)")
sleep(2) sleep(2)
except: except:
x = 0 x = 0
ps = html.fromstring(br.page_source) ps = html.fromstring(br.page_source)
newLinks = [] newLinks = []
# newlinks = ps.xpath('.//*/div[@class="e-con-inner"]/*/a/@href') # newlinks = ps.xpath('.//*/div[@class="e-con-inner"]/*/a/@href')
events = ps.xpath('.//*/div[@class="e-con-inner"]') events = ps.xpath('.//*/div[@class="ts-preview"]')
for event in events: for event in events:
ev = {} ev = {}
try: try:
ev['link'] = event.xpath('.//*/a/@href')[0] ev['link'] = event.xpath('.//*/h3/a/@href')[0]
ev['title'] = event.xpath('.//*/h3/a/text()')[0] ev['title'] = event.xpath('.//*/h3/a/text()')[0]
ev['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0].replace('\n', '').replace('\t', '') ev['venue'] = event.xpath('.//*/ul/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/a[@class="ts-action-con"]/text()')[-1:][0].replace('\n', '').replace('\t', '')
# e['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0] ev['venueLink'] = event.xpath('.//*/ul/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/a[@class="ts-action-con"]/@href')[-1:][0]
ev['venueLink'] = event.xpath('.//*/ul/li/a/@href')[1] # ev['venueLink'] = event.xpath('.//*/ul/li/a/@href')[1]
label= event.xpath('.//*/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/div/text()') label= event.xpath('.//*/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/div/text()')
ev['label'] = ''.join([x.replace('\t', '').replace('\n', '') for x in label]).strip() ev['label'] = ''.join([x.replace('\t', '').replace('\n', '') for x in label]).strip()
newLinks.append(ev) newLinks.append(ev)
# ppr(ev)
except Exception as e: except Exception as e:
print("Error: ", ev, e) print("Error: ", ev, e)
links = links + newLinks links = links + newLinks
return links return links
if len(sys.argv) >= 2: if len(sys.argv) >= 2:
arg1 = sys.argv[1] arg1 = sys.argv[1]
br = digitools.getBrowser(arg1) br = digitools.getBrowser(arg1)
@@ -89,11 +91,15 @@ for url in urls:
allLinks = getLinks(br, url, allLinks) allLinks = getLinks(br, url, allLinks)
totalLinks = list({v['title']:v for v in allLinks}.values()) totalLinks = list({v['title']:v for v in allLinks}.values())
ppr(totalLinks)
ppr(len(totalLinks)) ppr(len(totalLinks))
# sortedlinks = allLinks.sort() # sortedlinks = allLinks.sort()
# ppr(sortedlinks) # ppr(sortedlinks)
for event in totalLinks: # quit()
for event in totalLinks[175:250]:
br.get(event['link']) br.get(event['link'])
sleep(1) sleep(1)
ps = html.fromstring(br.page_source) ps = html.fromstring(br.page_source)
@@ -156,6 +162,10 @@ for d in new_data:
cal = Calendar.objects.get(shortcode='mde') cal = Calendar.objects.get(shortcode='mde')
print("run paisa")
ppr(paisa)
for d in paisa: for d in paisa:
try: try:
nvenue, created = Organization.objects.get_or_create( nvenue, created = Organization.objects.get_or_create(
@@ -170,17 +180,20 @@ for d in paisa:
nvenue.address_complete = d['address'] nvenue.address_complete = d['address']
nvenue.save() nvenue.save()
try:
new_event, created = Event.objects.update_or_create( new_event, created = Event.objects.update_or_create(
event_type = d['category'], event_type = d['category'],
show_title = d['title'], show_title = d['title'],
show_link = d['link'], show_link = d['link'],
show_date = d['dateStamp'], show_date = d['dateStamp'],
show_day = d['dateStamp'],
scraper = scraper, scraper = scraper,
venue = nvenue venue = nvenue
) )
new_event.calendar.add(cal) new_event.calendar.add(cal)
new_event.save() new_event.save()
print(new_event, created, new_event.scraper)
# print("Success:", new_event) # print("Success:", new_event)
except Execption as e:
print(e)
digitools.updateScraper(scraper, item_count_start) digitools.updateScraper(scraper, item_count_start)

View File

@@ -81,11 +81,11 @@ for event in events:
show_title = event["show_title"], show_title = event["show_title"],
show_link = event["link"], show_link = event["link"],
show_date = event["date_time"], show_date = event["date_time"],
show_day = event["date_time"],
guests = " ".join(event["subtitle"]), guests = " ".join(event["subtitle"]),
venue = venue venue = venue
) )
digitools.add_calendar(new_event, 'msp') digitools.add_calendar(new_event, 'msp')
ppr(new_event)
except Exception as e: except Exception as e:
print("oops ", e, "\n\n", "Scraper:", scraper) print("oops ", e, "\n\n", "Scraper:", scraper)

View File

@@ -55,7 +55,6 @@ def get_events(ps, event_type):
show_title = event['title'], show_title = event['title'],
show_link = event['link'], show_link = event['link'],
show_date = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT), show_date = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
show_day = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
more_details = event["details"], more_details = event["details"],
venue = venue venue = venue
) )
@@ -71,7 +70,6 @@ def get_events(ps, event_type):
show_title = event['title'], show_title = event['title'],
show_link = event['link'], show_link = event['link'],
show_date = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT), show_date = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
show_day = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
more_details = event["details"], more_details = event["details"],
venue = venue venue = venue
) )

View File

@@ -27,7 +27,7 @@ scraper,item_count_start, virtcal = digitools.getScraper(venue, venue.website, '
tz_str = " -0600 UTC" tz_str = " -0600 UTC"
DATETIME_FORMAT = '%a %B %d @ %I:%M %p %Y %z %Z' DATETIME_FORMAT = '%a %B %d @ %I:%M %p %Y %z %Z'
DATETIME_FORMAT_2 = '%a %B %d, %Y @ %I:%M %p %z %Z' DATETIME_FORMAT_2 = '%a %B %d @ %I:%M %p %Y %z %Z'
calendar_url = 'https://noboolpresents.com/venues/uptown-vfw/' calendar_url = 'https://noboolpresents.com/venues/uptown-vfw/'
current_year = str(datetime.now().year) current_year = str(datetime.now().year)
@@ -50,32 +50,33 @@ def getEvents(br):
title = event.xpath('.//*/h2[@class="alt-font"]/a/text()')[0].replace("\n", "").replace("\t", "") title = event.xpath('.//*/h2[@class="alt-font"]/a/text()')[0].replace("\n", "").replace("\t", "")
link = event.xpath('.//*/h2[@class="alt-font"]/a/@href')[0] link = event.xpath('.//*/h2[@class="alt-font"]/a/@href')[0]
try: try:
print("First Try", dateTime)
new_event, created = Event.objects.update_or_create( new_event, created = Event.objects.update_or_create(
scraper = scraper, scraper = scraper,
event_type = 'Mu', event_type = 'Mu',
show_title = title, show_title = title,
show_link = link, show_link = link,
show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT), show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
venue = venue venue = venue
) )
digitools.add_calendar(new_event, 'msp') digitools.add_calendar(new_event, 'msp')
scraper.items+=1 scraper.items+=1
except Exception as e: except Exception as e:
try: try:
print("New Try", dateTime)
new_event, created = Event.objects.update_or_create( new_event, created = Event.objects.update_or_create(
scraper = scraper, scraper = scraper,
event_type = 'Mu', event_type = 'Mu',
show_title = title, show_title = title,
show_link = link, show_link = link,
show_date = datetime.strptime(dateTime[:-4].strip() + tz_str, DATETIME_FORMAT_2), show_date = datetime.strptime(dateTime.strip() + tz_str, DATETIME_FORMAT_2),
show_day = datetime.strptime(dateTime[:-4].strip() + tz_str, DATETIME_FORMAT_2),
venue = venue venue = venue
) )
digitools.add_calendar(new_event, 'msp') digitools.add_calendar(new_event, 'msp')
scraper.items+=1 scraper.items+=1
except Exception as e: except Exception as e:
print("oops", e, dateTime[:-4]) print("oops", e)
ppr(event)
getEvents(br) getEvents(br)
br.find_element(By.XPATH, './/*/li[@class="tribe-events-c-nav__list-item tribe-events-c-nav__list-item--next"]/a').click() br.find_element(By.XPATH, './/*/li[@class="tribe-events-c-nav__list-item tribe-events-c-nav__list-item--next"]/a').click()

View File

@@ -0,0 +1,68 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
from atproto import Client, client_utils
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from contracts.models import Contract
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
tz = pytz.timezone("US/Central")
# USERNAME = "dreamfreely.org"
# PASSWORD = "gU):3-BA]DaK[_K$DHmI"
USERNAME = "digisnaxx.bsky.social"
PASSWORD = "xv57-clj3-sjpw-iz5o"
client = Client()
client.login(USERNAME, PASSWORD)
contracts = Contract.objects.all().order_by('-pub_date')[35:]
for c in contracts:
URL = "https://www.digisnaxx.com/contract/" + str(c.id)
URL_TITLE = "DoD Contract {0}"
URL_DESCRIPTION = "DigiSnaxx Link to DoD Contract {0}"
try:
if len(c.contract_value) > 4 and c.company != None:
POST_TEXT = "\n\n{0} / {1}\n\nCompany: {2}\n\n{3}\n\nNAICS Code: {4}".format(c.us_dept, c.us_dept_sub_tier, c.company.name, c.contract_value, c.naics_code)
ALT_TEXT = " ID {0}. For the {1} sub-tier {2}. Contract awarded to {3}.".format(c.notice_id, c.us_dept, c.us_dept_sub_tier, c.company.name)
elif len(c.contract_value) == 4 and c.company != None:
POST_TEXT = "\n\n{0} / {1}\n\nCompany: {2}\n\nNAICS Code: {3}".format(c.us_dept, c.us_dept_sub_tier, c.company.name, c.naics_code)
ALT_TEXT = " ID {0}. For the {1} sub-tier {2}. Contract awarded to {3}. {4}".format(c.notice_id, c.us_dept, c.us_dept_sub_tier, c.company.name, c.naics_code)
elif len(c.contract_value) > 4 and c.company == None:
POST_TEXT = "\n\n{0} / {1}\n\n{2}\n\nNAICS Code: {3}".format(c.us_dept, c.us_dept_sub_tier, c.contract_value, c.naics_code)
ALT_TEXT = " ID {0}. For the {1} sub-tier {2}. {3}. There is no company.".format(c.notice_id, c.us_dept, c.us_dept_sub_tier, c.naics_code)
elif len(c.contract_value) == 4 and c.company == None:
POST_TEXT = "\n\n{0} / {1}\n\nNAICS Code: {2}".format(c.us_dept, c.us_dept_sub_tier, c.naics_code)
ALT_TEXT = "ID {0}. For the {1} sub-tier {2}. There is no company.".format(c.notice_id, c.us_dept, c.us_dept_sub_tier)
else:
continue
text = client_utils.TextBuilder().tag("#DoDContracts ", "DoDContracts").tag("#"+c.notice_id[:16], c.notice_id[:16],).text(POST_TEXT + "\n\n").link(URL, URL)
print(POST_TEXT)
ppr(c)
client.send_post(text=text)
sleep(25)
except Exception as e:
print(c.notice_id)
print("Error: ", e)
# feed = client.get_author_feed(USERNAME, limit = 100)