Para poder extraer los enlaces de una página web usaremos regular expressions.
Primero tendremos que realizar un proceso para que descargue la web y así poderla analizar.
En c#:
--------------------------------
using System;
using System.Data;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections;
public ArrayList UrlList(string url)
{
byte[] resultHTML;
ArrayList linksArray = new ArrayList();
WebClient myWebClient = new WebClient();
resultHTML = myWebClient.DownloadData(url);
UTF8Encoding utf8Enc = new UTF8Encoding();
string myResultString = utf8Enc.GetString(resultHTML);
myResultString= myResultString.ToLower();
Regex regularexpre = new Regex("href\\s*=\\s*(?:(?:\\\"(?[^\\\"]*)\\\")|(?[^\\s]* ))");
MatchCollection collectionUrls = regularexpre.Matches(myResultString);
foreach(Match res in collectionUrls)
{
foreach(Group t in res.Groups)
{
linksArray.Add(t.Value);
}
}
return linksArray;
}
En Visual Basic:
--------------------------------
Imports System
Imports System.Collections
Imports System.Data
Imports System.Net
Imports System.Text
Imports System.Text.RegularExpressions
Dim resultHTML As Byte()
Dim myWebClient As New Net.WebClient
If UrlLocal <> url Then '
resultHTML = myWebClient.DownloadData(UrlLocal & "/" & url)
Else
resultHTML = myWebClient.DownloadData(url)
End If
Dim utf8Enc As New UTF8Encoding
Dim myResultString As String = utf8Enc.GetString(resultHTML)
myResultString = myResultString.ToLower()
Dim regularexpre As New Regex("href\s*=\s*(?:(?:\""(?[^\""]*)\"")|(?[^\s]* ))")
Dim collectionUrls As MatchCollection
collectionUrls = regularexpre.Matches(myResultString)
Dim res As Match
For Each res In collectionUrls
Dim t As Group
For Each t In res.Groups
arrLinks.Add(t.value)
Next
Next
Código fuente extraído de www.programar.net
Los trucos Visual Basic.NET más vistos
Este truco ha recibido 3 votaciones| Nota media: 5Selecciona tu
Puntuación: