① C#如何读取pdf文本
NuGet包管理器 ->程序包管理器控制台 ->输入Install-Package Spire.PDF然后按回车安装dll
然后使用下面代码读取PDF文本
using System;
using System.IO;
using System.Text;
using Spire.Pdf;
namespace ExtractText_PDF
{
class Program
{
static void Main(string[] args)
{
//实例化PdfDocument类对象,并加载PDF文档
PdfDocument doc = new PdfDocument();
doc.LoadFromFile("sample.pdf");
//实例化一个StringBuilder 对象
StringBuilder content = new StringBuilder();
//遍历文档所有PDF页面,提取文本
foreach (PdfPageBase page in doc.Pages)
{
content.Append(page.ExtractText());
}
//将提取到的文本写为.txt格式并保存到本地路径
String fileName = "获取文本.txt";
File.WriteAllText(fileName, content.ToString());
}
}
}
② java创建pdf文件写入不进去
通常需要用到用于读、写、编辑PDF文件的库,你可以参考下面采用spire.pdf.jar来创建PDF的步骤及方法:
首先需要引入jar包。具体的引入方法可以自行网络搜索。
创建PdfDocument类的对象,并通过PdfDocument.getPages().add()方法添加页码。
定义标题文字。
创建PdfSolidBrush画刷、PdfTrueTypeFont字体、PdfStringFormat字符串、Rectangle2D等对象,用于指定字符串绘制效果、字体、格式、绘制区域等。
通过PdfPageBase.getCanvas().drawString(body, font2, brush2, rect, format2)方法将内容绘制到PDF页面。
下面附上详细的代码demo示例:
import com.spire.pdf.*;
import com.spire.pdf.graphics.*;
import java.awt.*;
import java.awt.geom.*;
import java.io.*;
public class CreatePdfDocumentInJava {
public static void main(String[] args) throws FileNotFoundException, IOException {
//创建PdfDocument对象
PdfDocument doc = new PdfDocument();
//添加一页
PdfPageBase page = doc.getPages().add();
//标题文字
String title = "Java基础语法";
//创建单色画刷对象
PdfSolidBrush brush1 = new PdfSolidBrush(new PdfRGBColor(Color.BLUE));
PdfSolidBrush brush2 = new PdfSolidBrush(new PdfRGBColor(Color.BLACK));
//创建TrueType字体对象
PdfTrueTypeFont font1 = new PdfTrueTypeFont(new Font("宋体", Font.PLAIN, 14), true);
PdfTrueTypeFont font2 = new PdfTrueTypeFont(new Font("宋体", Font.PLAIN, 10), true);
//创建PdfStringFormat对象
PdfStringFormat format1 = new PdfStringFormat();
format1.setAlignment(PdfTextAlignment.Center);//设置文字居中
//使用drawString方法绘制标题文字
page.getCanvas().drawString(title, font1, brush1, new Point2D.Float((float) page.getActualBounds(true).getWidth() / 2, 0), format1);
//从txt文件读取内容到字符串
String body = readFileToString("C:\Users\Administrator\Desktop\bodyText.txt");
//创建PdfStringFormat对象
PdfStringFormat format2 = new PdfStringFormat();
format2.setParagraphIndent(20);//设置段首缩进
//创建Rectangle2D对象
Rectangle2D.Float rect = new Rectangle2D.Float(0, 30, (float) page.getActualBounds(true).getWidth(), (float) page.getActualBounds(true).getHeight());
//使用drawString方法在矩形区域绘制主体文字
page.getCanvas().drawString(body, font2, brush2, rect, format2);
//保存到PDF文档
doc.saveToFile("ouput.pdf");
}
//自定义方法读取txt文件内容到字符串
private static String readFileToString(String filepath) throws FileNotFoundException, IOException {
StringBuilder sb = new StringBuilder();
String s = "";
BufferedReader br = new BufferedReader(new FileReader(filepath));
while ((s = br.readLine()) != null) {
sb.append(s + "
");
}
br.close();
String str = sb.toString();
return str;
}
}
③ java中如何实现向已有的PDF文件插入附件
可以用Spire.Pdf for Java类库给PDF文档添加附件,下面的代码是插入Excel和Word附件给你参考:
import com.spire.pdf.annotations.*;
import com.spire.pdf.attachments.PdfAttachment;
import com.spire.pdf.graphics.*;
import java.awt.*;
import java.awt.geom.Dimension2D;
import java.awt.geom.Rectangle2D;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
public class AttachFiles {
public static void main(String[] args) throws IOException {
//创建PdfDocument对象
PdfDocument doc = new PdfDocument();
//加载PDF文档
doc.loadFromFile("C:\Users\Administrator\Desktop\sample.pdf");
//添加附件到PDF
PdfAttachment attachment = new PdfAttachment("C:\Users\Administrator\Desktop\使用说明书.docx");
doc.getAttachments().add(attachment);
//绘制标签
String label = "财务报表.xlsx";
PdfTrueTypeFont font = new PdfTrueTypeFont(new Font("Arial Unicode MS",Font.PLAIN,12),true);
double x = 35;
double y = doc.getPages().get(0).getActualSize().getHeight() - 200;
doc.getPages().get(0).getCanvas().drawString(label, font, PdfBrushes.getOrange(), x, y);
//添加注释附件到PDF
String filePath = "C:\Users\Administrator\Desktop\财务报表.xlsx";
byte[] data = toByteArray(filePath);
Dimension2D size = font.measureString(label);
Rectangle2D bound = new Rectangle2D.Float((float) (x + size.getWidth() + 2), (float) y, 10, 15);
PdfAttachmentAnnotation annotation = new PdfAttachmentAnnotation(bound, filePath, data);
annotation.setColor(new PdfRGBColor(new Color(0, 128, 128)));
annotation.setFlags(PdfAnnotationFlags.Default);
annotation.setIcon(PdfAttachmentIcon.Graph);
annotation.setText("点击打开财务报表.xlsx");
doc.getPages().get(0).getAnnotationsWidget().add(annotation);
//保存文档
doc.saveToFile("Attachments.pdf");
}
//读取文件到byte数组
public static byte[] toByteArray(String filePath) throws IOException {
File file = new File(filePath);
long fileSize = file.length();
if (fileSize > Integer.MAX_VALUE) {
System.out.println("file too big...");
return null;
}
FileInputStream fi = new FileInputStream(file);
byte[] buffer = new byte[(int) fileSize];
int offset = 0;
int numRead = 0;
while (offset < buffer.length && (numRead = fi.read(buffer, offset, buffer.length - offset)) >= 0) {
offset += numRead;
}
if (offset != buffer.length) {
throw new IOException("Could not completely read file "
+ file.getName());
}
fi.close();
return buffer;
}
}
效果:
④ 用c#如何提取pdf里的表格
添加spire.pdf.dll为引用,使用下面的代码即可提取pdf中所有表格数据到txt文档:
using System.IO;
using System.Text;
using Spire.Pdf;
using Spire.Pdf.Utilities;
namespace ExtractPdfTable
{
class Program
{
static void Main(string[] args)
{
//Create a PdfDocument object
PdfDocument doc = new PdfDocument();
//Load the sample PDF file
doc.LoadFromFile(@"C:.pdf");
//Create a StringBuilder object
StringBuilder builder = new StringBuilder();
//Initialize an instance of PdfTableExtractor class
PdfTableExtractor extractor = new PdfTableExtractor(doc);
//Declare a PdfTable array
PdfTable[] tableList = null;
int tableNum = 1;
//Loop through the pages
for (int pageIndex = 0; pageIndex < doc.Pages.Count; pageIndex++)
{
//Extract tables from a specific page
tableList = extractor.ExtractTable(pageIndex);
//Determine if the table list is null
if (tableList != null && tableList.Length > 0)
{
//Loop through the table in the list
foreach (PdfTable table in tableList)
{
builder.Append("Table " + tableNum);
builder.Append(" ");
//Get row number and column number of a certain table
int row = table.GetRowCount();
int column = table.GetColumnCount();
//Loop though the row and colunm
for (int i = 0; i < row; i++)
{
for (int j = 0; j < column; j++)
{
//Get text from the specific cell
string text = table.GetText(i, j);
//Add text to the string builder
builder.Append(text + " ");
}
builder.Append(" ");
}
builder.Append(" ");
tableNum += 1;
}
}
}
//Write to a .txt file
File.WriteAllText("Table.txt", builder.ToString());
}
}
}
⑤ java导出的pdf文件是空白是因为什么
导出的PDF大小是否是0kb?如果是0,那就是没有把内容写入到pdf,看下如下导出方法:
importjava.awt.*;
importjava.awt.geom.Point2D;
importjava.awt.geom.Rectangle2D;
importjava.io.*;
importcom.spire.pdf.PdfPageBase;
importcom.spire.pdf.graphics.*;
{
publicstaticvoidmain(String[]args)throwsFileNotFoundException,IOException{
//创建PdfDocument对象
PdfDocumentdoc=newPdfDocument();
//添加一页
PdfPageBasepage=doc.getPages().add();
//标题文字
Stringtitle="Java基础语法";
//创建单色画刷对象
PdfSolidBrushbrush1=newPdfSolidBrush(newPdfRGBColor(Color.BLUE));
PdfSolidBrushbrush2=newPdfSolidBrush(newPdfRGBColor(Color.BLACK));
//创建TrueType字体对象
PdfTrueTypeFontfont1=newPdfTrueTypeFont(newFont("ArialUnicodeMS",Font.PLAIN,14),true);
PdfTrueTypeFontfont2=newPdfTrueTypeFont(newFont("ArialUnicodeMS",Font.PLAIN,10),true);
//创建PdfStringFormat对象
PdfStringFormatformat1=newPdfStringFormat();
format1.setAlignment(PdfTextAlignment.Center);//设置文字居中
//使用drawString方法绘制标题文字
page.getCanvas().drawString(title,font1,brush1,newPoint2D.Float(page.getActualBounds(true).width/2,0),format1);
//从txt文件读取内容到字符串
Stringbody=readFileToString("C:\Users\Administrator\Desktop\bodyText.txt");
//创建PdfStringFormat对象
PdfStringFormatformat2=newPdfStringFormat();
format2.setParagraphIndent(20);//设置段首缩进
//创建Rectangle2D对象
Rectangle2D.Floatrect=newRectangle2D.Float(0,30,page.getActualBounds(true).width,page.getActualBounds(true).height);
//使用drawString方法在矩形区域绘制主体文字
page.getCanvas().drawString(body,font2,brush2,rect,format2);
//保存到PDF文档
doc.saveToFile("ouput.pdf");
}
//自定义方法读取txt文件内容到字符串
(Stringfilepath)throwsFileNotFoundException,IOException{
StringBuildersb=newStringBuilder();
Strings="";
BufferedReaderbr=newBufferedReader(newFileReader(filepath));
while((s=br.readLine())!=null){
sb.append(s+" ");
}
br.close();
Stringstr=sb.toString();
returnstr;
}
}
PDF到处效果如下图:
⑥ c#生成的pdf的大小为0kb
0KB那就是生成的PDF文档没内容,看下你的代码是哪里有问题。建议可以通过PDF库来实现生成文档。可参考下面的代码方法:
using System.Drawing;
using Spire.Pdf;
using Spire.Pdf.Graphics;
namespace CreatePDF_PDF
{
class Program
{
static void Main(string[] args)
{
//初始化一个PdfDocument类实例
PdfDocument document = new PdfDocument();
//声明 PdfUnitConvertor和PdfMargins类对象
PdfUnitConvertor unitCvtr = new PdfUnitConvertor();
PdfMargins margins = new PdfMargins();
//设置页边距
margins.Top = unitCvtr.ConvertUnits(2.54f, PdfGraphicsUnit.Centimeter, PdfGraphicsUnit.Point);
margins.Bottom = margins.Top;
margins.Left = unitCvtr.ConvertUnits(3.17f, PdfGraphicsUnit.Centimeter, PdfGraphicsUnit.Point);
margins.Right = margins.Left;
//新添加一个A4大小的页面
PdfPageBase page = document.Pages.Add(PdfPageSize.A4, margins);
//自定义PdfTrueTypeFont、PdfBrush实例,设置字体类型、字号和字体颜色
PdfTrueTypeFont font = new PdfTrueTypeFont(new Font("楷体", 15f),true);
PdfBrush brush = PdfBrushes.Red;
//调用DrawString()方法在指定位置写入文本
string text = ("《蝶恋花 送春》 楼外垂杨千万缕,欲系青春,少住春还去。犹自风前飘柳絮,随春且看归何处? 绿满山川闻杜宇,便作无情,莫也愁人苦。把酒送春春不语,黄昏却下潇潇雨。");
page.Canvas.DrawString(text, font, brush, 15, 13);
//加载图片,并调用DrawImage()方法在指定位置绘入图片
PdfImage image = PdfImage.FromFile("img.jpg");
float width = image.Width;
float height = image.Height;
page.Canvas.DrawImage(image, 15, 100, width, height);
//保存并打开文档
document.SaveToFile("PDF创建.pdf");
System.Diagnostics.Process.Start("PDF创建.pdf");
}
}
}
执行程序后,在C: Studio 2013ProjectsCreatePDF_PDFCreatePDF_PDFinDebug路径下可查看生成的文档。可以看到生成的文档大小,如图: