Click to See Complete Forum and Search --> : European character encoding in winsock GET.


andlag
October 4th, 2003, 05:14 PM
Hi,
I really need help on this.

This is the url:
http://www.altavista.com/web/results?q=%22l%C3%A4gg+till%22&kgs=0&kls=1&avkw=aapt

It will display a page on altavista with some hits to Swedish pages. When I run this in my program I got a page on altavista where it says that I got zero hits. But Internet Explorer display this just fine.

Please let me know if I can give you more detail of the problem.

Yours sincerely
Andla

I Post the source code here for you:
//lnk is the url I want to recieve.

int start=lnk.IndexOf("://");
if(start==-1)
return;

start+="://".Length;
int stop=lnk.IndexOf("/",start);
if(stop==-1)
stop=lnk.Length;
string domain=lnk.Substring(start,stop-start);
string path=lnk.Substring(stop,lnk.Length-stop).TrimEnd('/');


TcpClient cli = new TcpClient();
cli.Connect(domain,80);


NetworkStream stream = cli.GetStream();


UTF8Encoding encoder2 = new UTF8Encoding(false);
/*Encoding encoder = Encoding.GetEncoding( "iso-8859-1" );
encoder = Encoding.GetEncoding( "unicode-1-1-utf-8" );
encoder = Encoding.GetEncoding( "ascii" );
encoder = Encoding.GetEncoding( "unicode-2-0-utf-8" );*/


Byte[] request;
if(path=="")
{
request = encoder2.GetBytes("GET / HTTP/1.0\r\nHost: "+domain+"\r\n\r\n");
}
else
{

request = encoder2.GetBytes("GET "+path+" HTTP/1.0\r\nHost: "+domain+"\r\ncharset = \"UNICODE-1-1-UTF-8\"\r\n\r\n");
string fan=encoder2.GetString(request);
fan="";

}




stream.Flush();


stream.Write(request, 0, request.Length);


while(!stream.DataAvailable)
{
Console.WriteLine("WAIT");
Thread.Sleep(1000);
}



int buffcount = 0;
Byte[] buffer = new byte[1024];
String response = String.Empty;

Encoding encoder = Encoding.GetEncoding( "iso-8859-1" );

while((buffcount = stream.Read(buffer, 0, buffer.Length)) > 0)
response += encoder.GetString(buffer, 0, buffcount);



stream.Close();
cli.Close();