Szathmáry László honlapja @ DEIK | Nim2 / Extract links from a webpage

Fetch the HTML source of a webpage (as a string), and extract all the links.

$ nimble install htmlparser

import std/httpclient
import std/xmltree
import std/strtabs

import pkg/htmlparser # 3rd-party library

proc get_page(url: string): string =
let client = newHttpClient()
try:
client.getContent(url)
except HttpRequestError as e:
stderr.writeLine("Error: ", e.msg)
""

proc extractLinks(html: string): seq[string] =
try:
let doc = parseHtml(html)
for a in doc.findAll("a"):
if "href" in a.attrs:
result.add(a.attrs["href"])
except Exception as e:
echo "Error: " & e.msg

proc main() =
let
url = "https://arato.inf.unideb.hu/szathmary.laszlo/pmwiki/index.php?n=Acad.Nim2"
html = get_page(url)
links = extractLinks(html)

for link in links:
echo link

# ##########

when isMainModule:
main()

Output:

...
https://arato.inf.unideb.hu/szathmary.laszlo/pmwiki/index.php?n=Nim2.20260407a
https://arato.inf.unideb.hu/szathmary.laszlo/pmwiki/index.php?n=Nim2.20260406g
https://arato.inf.unideb.hu/szathmary.laszlo/pmwiki/index.php?n=Nim2.20260407d
...