① C#如何讀取pdf文本
NuGet包管理器 ->程序包管理器控制台 ->輸入Install-Package Spire.PDF然後按回車安裝dll
然後使用下面代碼讀取PDF文本
using System;
using System.IO;
using System.Text;
using Spire.Pdf;
namespace ExtractText_PDF
{
class Program
{
static void Main(string[] args)
{
//實例化PdfDocument類對象,並載入PDF文檔
PdfDocument doc = new PdfDocument();
doc.LoadFromFile("sample.pdf");
//實例化一個StringBuilder 對象
StringBuilder content = new StringBuilder();
//遍歷文檔所有PDF頁面,提取文本
foreach (PdfPageBase page in doc.Pages)
{
content.Append(page.ExtractText());
}
//將提取到的文本寫為.txt格式並保存到本地路徑
String fileName = "獲取文本.txt";
File.WriteAllText(fileName, content.ToString());
}
}
}
② java創建pdf文件寫入不進去
通常需要用到用於讀、寫、編輯PDF文件的庫,你可以參考下面採用spire.pdf.jar來創建PDF的步驟及方法:
首先需要引入jar包。具體的引入方法可以自行網路搜索。
創建PdfDocument類的對象,並通過PdfDocument.getPages().add()方法添加頁碼。
定義標題文字。
創建PdfSolidBrush畫刷、PdfTrueTypeFont字體、PdfStringFormat字元串、Rectangle2D等對象,用於指定字元串繪制效果、字體、格式、繪制區域等。
通過PdfPageBase.getCanvas().drawString(body, font2, brush2, rect, format2)方法將內容繪制到PDF頁面。
下面附上詳細的代碼demo示例:
import com.spire.pdf.*;
import com.spire.pdf.graphics.*;
import java.awt.*;
import java.awt.geom.*;
import java.io.*;
public class CreatePdfDocumentInJava {
public static void main(String[] args) throws FileNotFoundException, IOException {
//創建PdfDocument對象
PdfDocument doc = new PdfDocument();
//添加一頁
PdfPageBase page = doc.getPages().add();
//標題文字
String title = "Java基礎語法";
//創建單色畫刷對象
PdfSolidBrush brush1 = new PdfSolidBrush(new PdfRGBColor(Color.BLUE));
PdfSolidBrush brush2 = new PdfSolidBrush(new PdfRGBColor(Color.BLACK));
//創建TrueType字體對象
PdfTrueTypeFont font1 = new PdfTrueTypeFont(new Font("宋體", Font.PLAIN, 14), true);
PdfTrueTypeFont font2 = new PdfTrueTypeFont(new Font("宋體", Font.PLAIN, 10), true);
//創建PdfStringFormat對象
PdfStringFormat format1 = new PdfStringFormat();
format1.setAlignment(PdfTextAlignment.Center);//設置文字居中
//使用drawString方法繪制標題文字
page.getCanvas().drawString(title, font1, brush1, new Point2D.Float((float) page.getActualBounds(true).getWidth() / 2, 0), format1);
//從txt文件讀取內容到字元串
String body = readFileToString("C:\Users\Administrator\Desktop\bodyText.txt");
//創建PdfStringFormat對象
PdfStringFormat format2 = new PdfStringFormat();
format2.setParagraphIndent(20);//設置段首縮進
//創建Rectangle2D對象
Rectangle2D.Float rect = new Rectangle2D.Float(0, 30, (float) page.getActualBounds(true).getWidth(), (float) page.getActualBounds(true).getHeight());
//使用drawString方法在矩形區域繪制主體文字
page.getCanvas().drawString(body, font2, brush2, rect, format2);
//保存到PDF文檔
doc.saveToFile("ouput.pdf");
}
//自定義方法讀取txt文件內容到字元串
private static String readFileToString(String filepath) throws FileNotFoundException, IOException {
StringBuilder sb = new StringBuilder();
String s = "";
BufferedReader br = new BufferedReader(new FileReader(filepath));
while ((s = br.readLine()) != null) {
sb.append(s + "
");
}
br.close();
String str = sb.toString();
return str;
}
}
③ java中如何實現向已有的PDF文件插入附件
可以用Spire.Pdf for Java類庫給PDF文檔添加附件,下面的代碼是插入Excel和Word附件給你參考:
import com.spire.pdf.annotations.*;
import com.spire.pdf.attachments.PdfAttachment;
import com.spire.pdf.graphics.*;
import java.awt.*;
import java.awt.geom.Dimension2D;
import java.awt.geom.Rectangle2D;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
public class AttachFiles {
public static void main(String[] args) throws IOException {
//創建PdfDocument對象
PdfDocument doc = new PdfDocument();
//載入PDF文檔
doc.loadFromFile("C:\Users\Administrator\Desktop\sample.pdf");
//添加附件到PDF
PdfAttachment attachment = new PdfAttachment("C:\Users\Administrator\Desktop\使用說明書.docx");
doc.getAttachments().add(attachment);
//繪制標簽
String label = "財務報表.xlsx";
PdfTrueTypeFont font = new PdfTrueTypeFont(new Font("Arial Unicode MS",Font.PLAIN,12),true);
double x = 35;
double y = doc.getPages().get(0).getActualSize().getHeight() - 200;
doc.getPages().get(0).getCanvas().drawString(label, font, PdfBrushes.getOrange(), x, y);
//添加註釋附件到PDF
String filePath = "C:\Users\Administrator\Desktop\財務報表.xlsx";
byte[] data = toByteArray(filePath);
Dimension2D size = font.measureString(label);
Rectangle2D bound = new Rectangle2D.Float((float) (x + size.getWidth() + 2), (float) y, 10, 15);
PdfAttachmentAnnotation annotation = new PdfAttachmentAnnotation(bound, filePath, data);
annotation.setColor(new PdfRGBColor(new Color(0, 128, 128)));
annotation.setFlags(PdfAnnotationFlags.Default);
annotation.setIcon(PdfAttachmentIcon.Graph);
annotation.setText("點擊打開財務報表.xlsx");
doc.getPages().get(0).getAnnotationsWidget().add(annotation);
//保存文檔
doc.saveToFile("Attachments.pdf");
}
//讀取文件到byte數組
public static byte[] toByteArray(String filePath) throws IOException {
File file = new File(filePath);
long fileSize = file.length();
if (fileSize > Integer.MAX_VALUE) {
System.out.println("file too big...");
return null;
}
FileInputStream fi = new FileInputStream(file);
byte[] buffer = new byte[(int) fileSize];
int offset = 0;
int numRead = 0;
while (offset < buffer.length && (numRead = fi.read(buffer, offset, buffer.length - offset)) >= 0) {
offset += numRead;
}
if (offset != buffer.length) {
throw new IOException("Could not completely read file "
+ file.getName());
}
fi.close();
return buffer;
}
}
效果:
④ 用c#如何提取pdf里的表格
添加spire.pdf.dll為引用,使用下面的代碼即可提取pdf中所有表格數據到txt文檔:
using System.IO;
using System.Text;
using Spire.Pdf;
using Spire.Pdf.Utilities;
namespace ExtractPdfTable
{
class Program
{
static void Main(string[] args)
{
//Create a PdfDocument object
PdfDocument doc = new PdfDocument();
//Load the sample PDF file
doc.LoadFromFile(@"C:.pdf");
//Create a StringBuilder object
StringBuilder builder = new StringBuilder();
//Initialize an instance of PdfTableExtractor class
PdfTableExtractor extractor = new PdfTableExtractor(doc);
//Declare a PdfTable array
PdfTable[] tableList = null;
int tableNum = 1;
//Loop through the pages
for (int pageIndex = 0; pageIndex < doc.Pages.Count; pageIndex++)
{
//Extract tables from a specific page
tableList = extractor.ExtractTable(pageIndex);
//Determine if the table list is null
if (tableList != null && tableList.Length > 0)
{
//Loop through the table in the list
foreach (PdfTable table in tableList)
{
builder.Append("Table " + tableNum);
builder.Append(" ");
//Get row number and column number of a certain table
int row = table.GetRowCount();
int column = table.GetColumnCount();
//Loop though the row and colunm
for (int i = 0; i < row; i++)
{
for (int j = 0; j < column; j++)
{
//Get text from the specific cell
string text = table.GetText(i, j);
//Add text to the string builder
builder.Append(text + " ");
}
builder.Append(" ");
}
builder.Append(" ");
tableNum += 1;
}
}
}
//Write to a .txt file
File.WriteAllText("Table.txt", builder.ToString());
}
}
}
⑤ java導出的pdf文件是空白是因為什麼
導出的PDF大小是否是0kb?如果是0,那就是沒有把內容寫入到pdf,看下如下導出方法:
importjava.awt.*;
importjava.awt.geom.Point2D;
importjava.awt.geom.Rectangle2D;
importjava.io.*;
importcom.spire.pdf.PdfPageBase;
importcom.spire.pdf.graphics.*;
{
publicstaticvoidmain(String[]args)throwsFileNotFoundException,IOException{
//創建PdfDocument對象
PdfDocumentdoc=newPdfDocument();
//添加一頁
PdfPageBasepage=doc.getPages().add();
//標題文字
Stringtitle="Java基礎語法";
//創建單色畫刷對象
PdfSolidBrushbrush1=newPdfSolidBrush(newPdfRGBColor(Color.BLUE));
PdfSolidBrushbrush2=newPdfSolidBrush(newPdfRGBColor(Color.BLACK));
//創建TrueType字體對象
PdfTrueTypeFontfont1=newPdfTrueTypeFont(newFont("ArialUnicodeMS",Font.PLAIN,14),true);
PdfTrueTypeFontfont2=newPdfTrueTypeFont(newFont("ArialUnicodeMS",Font.PLAIN,10),true);
//創建PdfStringFormat對象
PdfStringFormatformat1=newPdfStringFormat();
format1.setAlignment(PdfTextAlignment.Center);//設置文字居中
//使用drawString方法繪制標題文字
page.getCanvas().drawString(title,font1,brush1,newPoint2D.Float(page.getActualBounds(true).width/2,0),format1);
//從txt文件讀取內容到字元串
Stringbody=readFileToString("C:\Users\Administrator\Desktop\bodyText.txt");
//創建PdfStringFormat對象
PdfStringFormatformat2=newPdfStringFormat();
format2.setParagraphIndent(20);//設置段首縮進
//創建Rectangle2D對象
Rectangle2D.Floatrect=newRectangle2D.Float(0,30,page.getActualBounds(true).width,page.getActualBounds(true).height);
//使用drawString方法在矩形區域繪制主體文字
page.getCanvas().drawString(body,font2,brush2,rect,format2);
//保存到PDF文檔
doc.saveToFile("ouput.pdf");
}
//自定義方法讀取txt文件內容到字元串
(Stringfilepath)throwsFileNotFoundException,IOException{
StringBuildersb=newStringBuilder();
Strings="";
BufferedReaderbr=newBufferedReader(newFileReader(filepath));
while((s=br.readLine())!=null){
sb.append(s+" ");
}
br.close();
Stringstr=sb.toString();
returnstr;
}
}
PDF到處效果如下圖:
⑥ c#生成的pdf的大小為0kb
0KB那就是生成的PDF文檔沒內容,看下你的代碼是哪裡有問題。建議可以通過PDF庫來實現生成文檔。可參考下面的代碼方法:
using System.Drawing;
using Spire.Pdf;
using Spire.Pdf.Graphics;
namespace CreatePDF_PDF
{
class Program
{
static void Main(string[] args)
{
//初始化一個PdfDocument類實例
PdfDocument document = new PdfDocument();
//聲明 PdfUnitConvertor和PdfMargins類對象
PdfUnitConvertor unitCvtr = new PdfUnitConvertor();
PdfMargins margins = new PdfMargins();
//設置頁邊距
margins.Top = unitCvtr.ConvertUnits(2.54f, PdfGraphicsUnit.Centimeter, PdfGraphicsUnit.Point);
margins.Bottom = margins.Top;
margins.Left = unitCvtr.ConvertUnits(3.17f, PdfGraphicsUnit.Centimeter, PdfGraphicsUnit.Point);
margins.Right = margins.Left;
//新添加一個A4大小的頁面
PdfPageBase page = document.Pages.Add(PdfPageSize.A4, margins);
//自定義PdfTrueTypeFont、PdfBrush實例,設置字體類型、字型大小和字體顏色
PdfTrueTypeFont font = new PdfTrueTypeFont(new Font("楷體", 15f),true);
PdfBrush brush = PdfBrushes.Red;
//調用DrawString()方法在指定位置寫入文本
string text = ("《蝶戀花 送春》 樓外垂楊千萬縷,欲系青春,少住春還去。猶自風前飄柳絮,隨春且看歸何處? 綠滿山川聞杜宇,便作無情,莫也愁人苦。把酒送春春不語,黃昏卻下瀟瀟雨。");
page.Canvas.DrawString(text, font, brush, 15, 13);
//載入圖片,並調用DrawImage()方法在指定位置繪入圖片
PdfImage image = PdfImage.FromFile("img.jpg");
float width = image.Width;
float height = image.Height;
page.Canvas.DrawImage(image, 15, 100, width, height);
//保存並打開文檔
document.SaveToFile("PDF創建.pdf");
System.Diagnostics.Process.Start("PDF創建.pdf");
}
}
}
執行程序後,在C: Studio 2013ProjectsCreatePDF_PDFCreatePDF_PDFinDebug路徑下可查看生成的文檔。可以看到生成的文檔大小,如圖: