|
Oktatás * Programozás 1 * Szkriptnyelvek Teaching * Programming 1 (BI) Félévek Linkek * kalendárium |
Nim2 /
Extract links from a webpageThis time let's do it with regular expressions. Core partimport std/re proc extract_links(html: string): seq[string] = let pattern = re"https?://(?:[a-zA-Z0-9\$-_@.&+!*\(\),]|%[0-9a-fA-F]{2})+" findAll(html, pattern) Complete exampleimport std/strutils # strip, split, join import std/httpclient import std/re proc get_page(url: string): string = let client = newHttpClient() try: client.getContent(url) except HttpRequestError as e: stderr.writeLine("Error: ", e.msg) "" proc extract_links(html: string): seq[string] = let pattern = re"https?://(?:[a-zA-Z0-9\$-_@.&+!*\(\),]|%[0-9a-fA-F]{2})+" findAll(html, pattern) proc main() = let url = "https://www.bing.com" html = get_page(url) urls = extract_links(html) for url in urls: echo url # for url in urls: # if ".jpg" in url: # echo url.split("&")[0] # break main() Output: https://www.bing.com/th?id=OHR.Kofa_ROW0914409827_tmb.jpg&rf= https://www.bing.com/?form=HPFBBK&ssd=20260507_0700&mkt=en-WW https://www.bing.com/ https://r.bing.com ... |
![]() Blogjaim, hobbi projektjeim * The Ubuntu Incident [ edit ] |