Wolvorine
February 21st, 2007, 06:01 AM
i just want to get inner text from the html file , for it i just read any html file on my disc and then to filter the text from those file i made below code...but just as my expectation it doesnt work...can anyone help me to solve this problem?
strHtmlData = " ......................";// suppose this is html text with tags etc which needs to be filter
IWebBrowser2* pWebBrowser = NULL;
CoInitialize(NULL);
HRESULT hr ;
hr = CoCreateInstance(CLSID_InternetExplorer, NULL, CLSCTX_ALL ,REGCLS_SINGLEUSE , (LPVOID*)&pWebBrowser);
if(hr==S_OK)
{
IDispatch* pHtmlDocDispatch = NULL;
IHTMLDocument2 * pHtmlDoc = NULL;
hr = pWebBrowser->get_Document (&pHtmlDocDispatch);
if (SUCCEEDED (hr) && (pHtmlDocDispatch != NULL))
{
hr = pHtmlDocDispatch->QueryInterface (IID_IHTMLDocument2,
(void**)&pHtmlDoc);
if (SUCCEEDED (hr) && (pHtmlDoc != NULL))
{
CComPtr<IHTMLElement> pHTMLElement;
HRESULT hr=pHtmlDoc->get_body(&pHTMLElement);
BSTR bstrHtml,bstrText;
bstrHtml = (BSTR) strHtmlData .c_str();
pHTMLElement->put_innerHTML(bstrHtml); //this is my html text
pHTMLElement->get_innerText(bstrText); //here is what i was looking for
SysFreeString(bstrHtml);
SysFreeString(bstrText);
pHTMLElement=NULL;
pHtmlDoc->Release();
}
pHtmlDocDispatch->Release();
}
pWebBrowser->Release ();
}
strHtmlData = " ......................";// suppose this is html text with tags etc which needs to be filter
IWebBrowser2* pWebBrowser = NULL;
CoInitialize(NULL);
HRESULT hr ;
hr = CoCreateInstance(CLSID_InternetExplorer, NULL, CLSCTX_ALL ,REGCLS_SINGLEUSE , (LPVOID*)&pWebBrowser);
if(hr==S_OK)
{
IDispatch* pHtmlDocDispatch = NULL;
IHTMLDocument2 * pHtmlDoc = NULL;
hr = pWebBrowser->get_Document (&pHtmlDocDispatch);
if (SUCCEEDED (hr) && (pHtmlDocDispatch != NULL))
{
hr = pHtmlDocDispatch->QueryInterface (IID_IHTMLDocument2,
(void**)&pHtmlDoc);
if (SUCCEEDED (hr) && (pHtmlDoc != NULL))
{
CComPtr<IHTMLElement> pHTMLElement;
HRESULT hr=pHtmlDoc->get_body(&pHTMLElement);
BSTR bstrHtml,bstrText;
bstrHtml = (BSTR) strHtmlData .c_str();
pHTMLElement->put_innerHTML(bstrHtml); //this is my html text
pHTMLElement->get_innerText(bstrText); //here is what i was looking for
SysFreeString(bstrHtml);
SysFreeString(bstrText);
pHTMLElement=NULL;
pHtmlDoc->Release();
}
pHtmlDocDispatch->Release();
}
pWebBrowser->Release ();
}