Skip to content

Commit

Permalink
MS Events update and improvement for 2025
Browse files Browse the repository at this point in the history
  • Loading branch information
braykuka committed Jan 21, 2025
1 parent e89aeb0 commit 755ad9d
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions scrapers/ms/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class SenateAgenda(HtmlPage):

def process_page(self):
pdf_link = (
"https://legislature.ms.gov/media/1151/2024_SENATE_COMMITTEE_AGENDAS.pdf"
"https://legislature.ms.gov/media/1151/2025_SENATE_COMMITTEE_AGENDAS.pdf"
)
yield from SenateAgendaPdf(source=pdf_link).do_scrape()

Expand All @@ -36,7 +36,6 @@ def process_page(self):
class SenateAgendaPdf(PdfPage):
def process_page(self):
event = None

# Strip all lines and remove empty lines
lines = [line.strip() for line in self.text.splitlines() if line.strip()]

Expand Down Expand Up @@ -104,7 +103,9 @@ def scrape_senate(self):
return SenateAgenda().do_scrape()

def scrape_house(self):
event_url = "https://billstatus.ls.state.ms.us/htms/h_sched.htm"
event_url = (
"https://www.legislature.ms.gov/calendars-and-schedules/house-calendar/"
)
text = self.get(event_url).text
event = None
when, time, room, com, desc = None, None, None, None, None
Expand All @@ -121,7 +122,7 @@ def scrape_house(self):
alpha = alpha.replace(" ", "").replace(".", "")
bill = f"{alpha} {num}"
bills_seen.add(bill)

print("alpha", alpha, "bill", bill)
if re.match(
r"^(MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY|SATURDAY|SUNDAY)",
line,
Expand Down Expand Up @@ -203,7 +204,7 @@ def scrape_house(self):
# Reset bills_seen so subsequent events don't get bills
# from previous events
bills_seen = set()

print("event=>", event)
yield event

def is_com(self, event_name):
Expand Down

0 comments on commit 755ad9d

Please sign in to comment.