Read contents of a web page

  • Thread starter Thread starter Kuldeep
  • Start date Start date
K

Kuldeep

Hi All,

I am trying to read the contents of a page through its URL.

My code snippet is as follows:
public void mtdGetPageDataHWR()
{
HttpWebRequest objRequ =
(HttpWebRequest)WebRequest.Create("http://www.microsoft.com");
HttpWebResponse objResp = (HttpWebResponse)objRequ.GetResponse();
string strVersion = objResp.ProtocolVersion.ToString();
StreamReader objRd = new StreamReader(objResp.GetResponseStream());
string strRd = objRd.ReadLine();
while(strRd!=null)
{
Response.Write(strRd);
strRd = objRd.ReadLine();
}
}

Is there any other way to achieve this which could be more efficient or
faster than this.

Any help on this would be very handy

Thanks,

Kuldeep
 
You should use myHttpWebRequest.BeginGetResponse to do it asynchronous.
and retrieve the response from EndGetResponse.

Here is an example from MSDN on how to do that.


using System;
using System.Net;
using System.IO;
using System.Text;
using System.Threading;


public class RequestState
{
// This class stores the State of the request.
const int BUFFER_SIZE = 1024;
public StringBuilder requestData;
public byte[] BufferRead;
public HttpWebRequest request;
public HttpWebResponse response;
public Stream streamResponse;
public RequestState()
{
BufferRead = new byte[BUFFER_SIZE];
requestData = new StringBuilder("");
request = null;
streamResponse = null;
}
}

class HttpWebRequest_BeginGetResponse
{
public static ManualResetEvent allDone= new ManualResetEvent(false);
const int BUFFER_SIZE = 1024;
const int DefaultTimeout = 2 * 60 * 1000; // 2 minutes timeout

// Abort the request if the timer fires.
private static void TimeoutCallback(object state, bool timedOut) {
if (timedOut) {
HttpWebRequest request = state as HttpWebRequest;
if (request != null) {
request.Abort();
}
}
}

static void Main()
{

try
{
// Create a HttpWebrequest object to the desired URL.
HttpWebRequest myHttpWebRequest=
(HttpWebRequest)WebRequest.Create("http://www.contoso.com");


/**
* If you are behind a firewall and you do not have your browser
proxy setup
* you need to use the following proxy creation code.

// Create a proxy object.
WebProxy myProxy = new WebProxy();

// Associate a new Uri object to the _wProxy object, using the
proxy address
// selected by the user.
myProxy.Address = new Uri("http://myproxy");


// Finally, initialize the Web request object proxy property with
the _wProxy
// object.
myHttpWebRequest.Proxy=myProxy;
***/

// Create an instance of the RequestState and assign the previous
myHttpWebRequest
// object to its request field.
RequestState myRequestState = new RequestState();
myRequestState.request = myHttpWebRequest;


// Start the asynchronous request.
IAsyncResult result=
(IAsyncResult) myHttpWebRequest.BeginGetResponse(new
AsyncCallback(RespCallback),myRequestState);

// this line implements the timeout, if there is a timeout, the
callback fires and the request becomes aborted
ThreadPool.RegisterWaitForSingleObject (result.AsyncWaitHandle,
new WaitOrTimerCallback(TimeoutCallback), myHttpWebRequest,
DefaultTimeout, true);

// The response came in the allowed time. The work processing
will happen in the
// callback function.
allDone.WaitOne();

// Release the HttpWebResponse resource.
myRequestState.response.Close();
}
catch(WebException e)
{
Console.WriteLine("\nMain Exception raised!");
Console.WriteLine("\nMessage:{0}",e.Message);
Console.WriteLine("\nStatus:{0}",e.Status);
Console.WriteLine("Press any key to continue..........");
}
catch(Exception e)
{
Console.WriteLine("\nMain Exception raised!");
Console.WriteLine("Source :{0} " , e.Source);
Console.WriteLine("Message :{0} " , e.Message);
Console.WriteLine("Press any key to continue..........");
Console.Read();
}
}
private static void RespCallback(IAsyncResult asynchronousResult)
{
try
{
// State of request is asynchronous.
RequestState myRequestState=(RequestState)
asynchronousResult.AsyncState;
HttpWebRequest myHttpWebRequest=myRequestState.request;
myRequestState.response = (HttpWebResponse)
myHttpWebRequest.EndGetResponse(asynchronousResult);

// Read the response into a Stream object.
Stream responseStream = myRequestState.response.GetResponseStream();
myRequestState.streamResponse=responseStream;

// Begin the Reading of the contents of the HTML page and print
it to the console.
IAsyncResult asynchronousInputRead =
responseStream.BeginRead(myRequestState.BufferRead, 0, BUFFER_SIZE, new
AsyncCallback(ReadCallBack), myRequestState);
return;
}
catch(WebException e)
{
Console.WriteLine("\nRespCallback Exception raised!");
Console.WriteLine("\nMessage:{0}",e.Message);
Console.WriteLine("\nStatus:{0}",e.Status);
}
allDone.Set();
}
private static void ReadCallBack(IAsyncResult asyncResult)
{
try
{

RequestState myRequestState = (RequestState)asyncResult.AsyncState;
Stream responseStream = myRequestState.streamResponse;
int read = responseStream.EndRead( asyncResult );
// Read the HTML page and then print it to the console.
if (read > 0)
{

myRequestState.requestData.Append(Encoding.ASCII.GetString(myRequestState.BufferRead,
0, read));
IAsyncResult asynchronousResult = responseStream.BeginRead(
myRequestState.BufferRead, 0, BUFFER_SIZE, new
AsyncCallback(ReadCallBack), myRequestState);
return;
}
else
{
Console.WriteLine("\nThe contents of the Html page are : ");
if(myRequestState.requestData.Length>1)
{
string stringContent;
stringContent = myRequestState.requestData.ToString();
Console.WriteLine(stringContent);
}
Console.WriteLine("Press any key to continue..........");
Console.ReadLine();

responseStream.Close();
}

}
catch(WebException e)
{
Console.WriteLine("\nReadCallBack Exception raised!");
Console.WriteLine("\nMessage:{0}",e.Message);
Console.WriteLine("\nStatus:{0}",e.Status);
}
allDone.Set();

}
 
Hi Sun,

Is there a method which could be as fast as a "Ctrl+F" on
a web page to achieve the same?
Or something that we could control through Javascript or any scripting
language for that matter?


Thanks for the response
Kuldeep


Jianwei Sun said:
You should use myHttpWebRequest.BeginGetResponse to do it asynchronous.
and retrieve the response from EndGetResponse.

Here is an example from MSDN on how to do that.


using System;
using System.Net;
using System.IO;
using System.Text;
using System.Threading;


public class RequestState
{
// This class stores the State of the request.
const int BUFFER_SIZE = 1024;
public StringBuilder requestData;
public byte[] BufferRead;
public HttpWebRequest request;
public HttpWebResponse response;
public Stream streamResponse;
public RequestState()
{
BufferRead = new byte[BUFFER_SIZE];
requestData = new StringBuilder("");
request = null;
streamResponse = null;
}
}

class HttpWebRequest_BeginGetResponse
{
public static ManualResetEvent allDone= new ManualResetEvent(false);
const int BUFFER_SIZE = 1024;
const int DefaultTimeout = 2 * 60 * 1000; // 2 minutes timeout

// Abort the request if the timer fires.
private static void TimeoutCallback(object state, bool timedOut) {
if (timedOut) {
HttpWebRequest request = state as HttpWebRequest;
if (request != null) {
request.Abort();
}
}
}

static void Main()
{

try
{
// Create a HttpWebrequest object to the desired URL.
HttpWebRequest myHttpWebRequest=
(HttpWebRequest)WebRequest.Create("http://www.contoso.com");


/**
* If you are behind a firewall and you do not have your browser proxy
setup
* you need to use the following proxy creation code.

// Create a proxy object.
WebProxy myProxy = new WebProxy();

// Associate a new Uri object to the _wProxy object, using the proxy
address
// selected by the user.
myProxy.Address = new Uri("http://myproxy");


// Finally, initialize the Web request object proxy property with
the _wProxy
// object.
myHttpWebRequest.Proxy=myProxy;
***/

// Create an instance of the RequestState and assign the previous
myHttpWebRequest
// object to its request field.
RequestState myRequestState = new RequestState();
myRequestState.request = myHttpWebRequest;


// Start the asynchronous request.
IAsyncResult result=
(IAsyncResult) myHttpWebRequest.BeginGetResponse(new
AsyncCallback(RespCallback),myRequestState);

// this line implements the timeout, if there is a timeout, the
callback fires and the request becomes aborted
ThreadPool.RegisterWaitForSingleObject (result.AsyncWaitHandle, new
WaitOrTimerCallback(TimeoutCallback), myHttpWebRequest, DefaultTimeout,
true);

// The response came in the allowed time. The work processing will
happen in the
// callback function.
allDone.WaitOne();

// Release the HttpWebResponse resource.
myRequestState.response.Close();
}
catch(WebException e)
{
Console.WriteLine("\nMain Exception raised!");
Console.WriteLine("\nMessage:{0}",e.Message);
Console.WriteLine("\nStatus:{0}",e.Status);
Console.WriteLine("Press any key to continue..........");
}
catch(Exception e)
{
Console.WriteLine("\nMain Exception raised!");
Console.WriteLine("Source :{0} " , e.Source);
Console.WriteLine("Message :{0} " , e.Message);
Console.WriteLine("Press any key to continue..........");
Console.Read();
}
}
private static void RespCallback(IAsyncResult asynchronousResult)
{
try
{
// State of request is asynchronous.
RequestState myRequestState=(RequestState)
asynchronousResult.AsyncState;
HttpWebRequest myHttpWebRequest=myRequestState.request;
myRequestState.response = (HttpWebResponse)
myHttpWebRequest.EndGetResponse(asynchronousResult);

// Read the response into a Stream object.
Stream responseStream = myRequestState.response.GetResponseStream();
myRequestState.streamResponse=responseStream;

// Begin the Reading of the contents of the HTML page and print it
to the console.
IAsyncResult asynchronousInputRead =
responseStream.BeginRead(myRequestState.BufferRead, 0, BUFFER_SIZE, new
AsyncCallback(ReadCallBack), myRequestState);
return;
}
catch(WebException e)
{
Console.WriteLine("\nRespCallback Exception raised!");
Console.WriteLine("\nMessage:{0}",e.Message);
Console.WriteLine("\nStatus:{0}",e.Status);
}
allDone.Set();
}
private static void ReadCallBack(IAsyncResult asyncResult)
{
try
{

RequestState myRequestState = (RequestState)asyncResult.AsyncState;
Stream responseStream = myRequestState.streamResponse;
int read = responseStream.EndRead( asyncResult );
// Read the HTML page and then print it to the console.
if (read > 0)
{

myRequestState.requestData.Append(Encoding.ASCII.GetString(myRequestState.BufferRead,
0, read));
IAsyncResult asynchronousResult = responseStream.BeginRead(
myRequestState.BufferRead, 0, BUFFER_SIZE, new
AsyncCallback(ReadCallBack), myRequestState);
return;
}
else
{
Console.WriteLine("\nThe contents of the Html page are : ");
if(myRequestState.requestData.Length>1)
{
string stringContent;
stringContent = myRequestState.requestData.ToString();
Console.WriteLine(stringContent);
}
Console.WriteLine("Press any key to continue..........");
Console.ReadLine();

responseStream.Close();
}

}
catch(WebException e)
{
Console.WriteLine("\nReadCallBack Exception raised!");
Console.WriteLine("\nMessage:{0}",e.Message);
Console.WriteLine("\nStatus:{0}",e.Status);
}
allDone.Set();

}
Hi All,

I am trying to read the contents of a page through its URL.

My code snippet is as follows:
public void mtdGetPageDataHWR()
{
HttpWebRequest objRequ =
(HttpWebRequest)WebRequest.Create("http://www.microsoft.com");
HttpWebResponse objResp = (HttpWebResponse)objRequ.GetResponse();
string strVersion = objResp.ProtocolVersion.ToString();
StreamReader objRd = new StreamReader(objResp.GetResponseStream());
string strRd = objRd.ReadLine();
while(strRd!=null)
{
Response.Write(strRd);
strRd = objRd.ReadLine();
}
}

Is there any other way to achieve this which could be more efficient or
faster than this.

Any help on this would be very handy

Thanks,

Kuldeep
 
If I understand correctly, you are looking for some client-side
functionalities, then this is really a wrong group to post this question.
Hi Sun,

Is there a method which could be as fast as a "Ctrl+F" on
a web page to achieve the same?
Or something that we could control through Javascript or any scripting
language for that matter?


Thanks for the response
Kuldeep


Jianwei Sun said:
You should use myHttpWebRequest.BeginGetResponse to do it asynchronous.
and retrieve the response from EndGetResponse.

Here is an example from MSDN on how to do that.


using System;
using System.Net;
using System.IO;
using System.Text;
using System.Threading;


public class RequestState
{
// This class stores the State of the request.
const int BUFFER_SIZE = 1024;
public StringBuilder requestData;
public byte[] BufferRead;
public HttpWebRequest request;
public HttpWebResponse response;
public Stream streamResponse;
public RequestState()
{
BufferRead = new byte[BUFFER_SIZE];
requestData = new StringBuilder("");
request = null;
streamResponse = null;
}
}

class HttpWebRequest_BeginGetResponse
{
public static ManualResetEvent allDone= new ManualResetEvent(false);
const int BUFFER_SIZE = 1024;
const int DefaultTimeout = 2 * 60 * 1000; // 2 minutes timeout

// Abort the request if the timer fires.
private static void TimeoutCallback(object state, bool timedOut) {
if (timedOut) {
HttpWebRequest request = state as HttpWebRequest;
if (request != null) {
request.Abort();
}
}
}

static void Main()
{

try
{
// Create a HttpWebrequest object to the desired URL.
HttpWebRequest myHttpWebRequest=
(HttpWebRequest)WebRequest.Create("http://www.contoso.com");


/**
* If you are behind a firewall and you do not have your browser proxy
setup
* you need to use the following proxy creation code.

// Create a proxy object.
WebProxy myProxy = new WebProxy();

// Associate a new Uri object to the _wProxy object, using the proxy
address
// selected by the user.
myProxy.Address = new Uri("http://myproxy");


// Finally, initialize the Web request object proxy property with
the _wProxy
// object.
myHttpWebRequest.Proxy=myProxy;
***/

// Create an instance of the RequestState and assign the previous
myHttpWebRequest
// object to its request field.
RequestState myRequestState = new RequestState();
myRequestState.request = myHttpWebRequest;


// Start the asynchronous request.
IAsyncResult result=
(IAsyncResult) myHttpWebRequest.BeginGetResponse(new
AsyncCallback(RespCallback),myRequestState);

// this line implements the timeout, if there is a timeout, the
callback fires and the request becomes aborted
ThreadPool.RegisterWaitForSingleObject (result.AsyncWaitHandle, new
WaitOrTimerCallback(TimeoutCallback), myHttpWebRequest, DefaultTimeout,
true);

// The response came in the allowed time. The work processing will
happen in the
// callback function.
allDone.WaitOne();

// Release the HttpWebResponse resource.
myRequestState.response.Close();
}
catch(WebException e)
{
Console.WriteLine("\nMain Exception raised!");
Console.WriteLine("\nMessage:{0}",e.Message);
Console.WriteLine("\nStatus:{0}",e.Status);
Console.WriteLine("Press any key to continue..........");
}
catch(Exception e)
{
Console.WriteLine("\nMain Exception raised!");
Console.WriteLine("Source :{0} " , e.Source);
Console.WriteLine("Message :{0} " , e.Message);
Console.WriteLine("Press any key to continue..........");
Console.Read();
}
}
private static void RespCallback(IAsyncResult asynchronousResult)
{
try
{
// State of request is asynchronous.
RequestState myRequestState=(RequestState)
asynchronousResult.AsyncState;
HttpWebRequest myHttpWebRequest=myRequestState.request;
myRequestState.response = (HttpWebResponse)
myHttpWebRequest.EndGetResponse(asynchronousResult);

// Read the response into a Stream object.
Stream responseStream = myRequestState.response.GetResponseStream();
myRequestState.streamResponse=responseStream;

// Begin the Reading of the contents of the HTML page and print it
to the console.
IAsyncResult asynchronousInputRead =
responseStream.BeginRead(myRequestState.BufferRead, 0, BUFFER_SIZE, new
AsyncCallback(ReadCallBack), myRequestState);
return;
}
catch(WebException e)
{
Console.WriteLine("\nRespCallback Exception raised!");
Console.WriteLine("\nMessage:{0}",e.Message);
Console.WriteLine("\nStatus:{0}",e.Status);
}
allDone.Set();
}
private static void ReadCallBack(IAsyncResult asyncResult)
{
try
{

RequestState myRequestState = (RequestState)asyncResult.AsyncState;
Stream responseStream = myRequestState.streamResponse;
int read = responseStream.EndRead( asyncResult );
// Read the HTML page and then print it to the console.
if (read > 0)
{

myRequestState.requestData.Append(Encoding.ASCII.GetString(myRequestState.BufferRead,
0, read));
IAsyncResult asynchronousResult = responseStream.BeginRead(
myRequestState.BufferRead, 0, BUFFER_SIZE, new
AsyncCallback(ReadCallBack), myRequestState);
return;
}
else
{
Console.WriteLine("\nThe contents of the Html page are : ");
if(myRequestState.requestData.Length>1)
{
string stringContent;
stringContent = myRequestState.requestData.ToString();
Console.WriteLine(stringContent);
}
Console.WriteLine("Press any key to continue..........");
Console.ReadLine();

responseStream.Close();
}

}
catch(WebException e)
{
Console.WriteLine("\nReadCallBack Exception raised!");
Console.WriteLine("\nMessage:{0}",e.Message);
Console.WriteLine("\nStatus:{0}",e.Status);
}
allDone.Set();

}
Hi All,

I am trying to read the contents of a page through its URL.

My code snippet is as follows:
public void mtdGetPageDataHWR()
{
HttpWebRequest objRequ =
(HttpWebRequest)WebRequest.Create("http://www.microsoft.com");
HttpWebResponse objResp = (HttpWebResponse)objRequ.GetResponse();
string strVersion = objResp.ProtocolVersion.ToString();
StreamReader objRd = new StreamReader(objResp.GetResponseStream());
string strRd = objRd.ReadLine();
while(strRd!=null)
{
Response.Write(strRd);
strRd = objRd.ReadLine();
}
}

Is there any other way to achieve this which could be more efficient or
faster than this.

Any help on this would be very handy

Thanks,

Kuldeep
 
Hi,

A very simple way to download data over web is using a WebClient

System.Net.WebClient client = new System.Net.WebClient();
byte[] data = client.DownloadData("http://www.microsoft.com");
string html = System.Text.Encoding.UTF8.GetString(data);

However, using a webclient, you have little control of the transfer.
 
Back
Top