3
将文件从MSword转换为PDF抽取后,出现错误未找到PDF标头签名。错误找不到PDF标头签名
public void Extract_inputpdf()
{
text_input_File = string.Empty;
StringBuilder sb_inputpdf = new StringBuilder();
PdfReader reader_inputPdf = new PdfReader(path); //read PDF
for (int i = 0; i <=reader_inputPdf.NumberOfPages ; i++)
{
TextWithFont_inputPdf inputpdf = new TextWithFont_inputPdf();
text_input_File = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader_inputPdf, i, inputpdf);
sb_inputpdf.Append(text_input_File);
input_pdf = sb_inputpdf.ToString();
}
reader_inputPdf.Close();
clear();
}
谁能告诉我怎么解决this.Thanku
// word to pdf
if (Extentsion_path == ".doc" || Extentsion_path == ".docx")
{
uploadFInput.SaveAs(targetPathip);
string wordFileName = targetPathip;
_Word.Visible = false;
_Word.ScreenUpdating = false;
// Cast as Object for word Open method
filename = (object)wordFileName;
// Use the dummy value as a placeholder for optional arguments
Microsoft.Office.Interop.Word.Document doc = _Word.Documents.Open(ref filename, ref _MissingValue,
ref _MissingValue, ref _MissingValue, ref _MissingValue, ref _MissingValue, ref _MissingValue,
ref _MissingValue, ref _MissingValue, ref _MissingValue, ref _MissingValue, ref _MissingValue,
ref _MissingValue, ref _MissingValue, ref _MissingValue, ref _MissingValue);
doc.Activate();
object outputFileName = pdfFileName = Path.ChangeExtension(wordFileName, "pdf");
object fileFormat = WdSaveFormat.wdFormatPDF;
// Save document into PDF Format
doc.SaveAs(ref outputFileName, ref fileFormat, ref _MissingValue, ref _MissingValue,
ref _MissingValue, ref _MissingValue, ref _MissingValue, ref _MissingValue,
ref _MissingValue, ref _MissingValue, ref _MissingValue, ref _MissingValue,
ref _MissingValue, ref _MissingValue, ref _MissingValue, ref _MissingValue);
// Close the Word document, but leave the Word application open.
// doc has to be cast to type _Document so that it will find the
// correct Close method.
object saveChanges = WdSaveOptions.wdDoNotSaveChanges;
((_Document)doc).Close(ref saveChanges, ref _MissingValue, ref _MissingValue);
doc = null;
// word has to be cast to type _Application so that it will find
// the correct Quit method.
((_Application)_Word).Quit(ref _MissingValue, ref _MissingValue, ref _MissingValue);
_Word = null;
//uploadFInput.SaveAs(pdfFileName);
// = targetPathip;
uploadFInput.SaveAs(pdfFileName);
LblFleip.Text = pdfFileName;
}
else
{
uploadFInput.SaveAs(targetPathip);
LblFleip.Text = targetPathip;
}
那么您是从Microsoft Word保存/导出为PDF还是现在您正在尝试阅读PDF?哪条线给你错误?我可以发现的一件事是,iText中的页码以'1'开始,而不是'0',因此请尝试将'for'循环从'1'开始。 –
我给PDf加了词。读取时出错PdfReader reader_inputPdf = new PdfReader(path); //阅读PDF – pdp
用于测试目的我正在测试相同的PDF很多次我得到类型不匹配的错误。 (来自HRESULT的异常:0x80020005(DISP_E_TYPEMISMATCH)) – pdp