Hosting domitienda.com
MisTrucos.Net - El rincón de los trucos informáticos El rincón de los trucos informáticos
Trucos Visual Basic.NET > Extraer enlaces de una página Web
Para poder extraer los enlaces de una página web usaremos regular expressions.

Primero tendremos que realizar un proceso para que descargue la web y así poderla analizar.

En c#:
--------------------------------
using System;
using System.Data;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections;


public ArrayList UrlList(string url)
{

byte[] resultHTML;

ArrayList linksArray = new ArrayList();

WebClient myWebClient = new WebClient();

resultHTML = myWebClient.DownloadData(url);

UTF8Encoding utf8Enc = new UTF8Encoding();

string myResultString = utf8Enc.GetString(resultHTML);

myResultString= myResultString.ToLower();

Regex regularexpre = new Regex("href\\s*=\\s*(?:(?:\\\"(?[^\\\"]*)\\\")|(?[^\\s]* ))");

MatchCollection collectionUrls = regularexpre.Matches(myResultString);

foreach(Match res in collectionUrls)
{
foreach(Group t in res.Groups)
{
linksArray.Add(t.Value);
}
}

return linksArray;
}

En Visual Basic:
--------------------------------
Imports System
Imports System.Collections
Imports System.Data
Imports System.Net
Imports System.Text
Imports System.Text.RegularExpressions

Dim resultHTML As Byte()
Dim myWebClient As New Net.WebClient

If UrlLocal <> url Then '
resultHTML = myWebClient.DownloadData(UrlLocal & "/" & url)
Else
resultHTML = myWebClient.DownloadData(url)
End If

Dim utf8Enc As New UTF8Encoding

Dim myResultString As String = utf8Enc.GetString(resultHTML)

myResultString = myResultString.ToLower()

Dim regularexpre As New Regex("href\s*=\s*(?:(?:\""(?[^\""]*)\"")|(?[^\s]* ))")

Dim collectionUrls As MatchCollection
collectionUrls = regularexpre.Matches(myResultString)

Dim res As Match

For Each res In collectionUrls
Dim t As Group
For Each t In res.Groups
arrLinks.Add(t.value)
Next
Next

Código fuente extraído de www.programar.net