package tools import ( "baliance.com/gooxml/document" "bytes" "context" "github.com/google/go-tika/tika" "github.com/ledongthuc/pdf" "github.com/tealeg/xlsx" "os" ) func ReadPdfAll2(path string) (string, error) { f, err := os.Open(path) defer f.Close() if err != nil { return "", err } client := tika.NewClient(nil, "http://127.0.0.1:9998") res, err := client.Parse(context.TODO(), f) return TrimHtml(res), err } //读取Excel全部数据 func ReadExcelAll(excelPath string) ([]string, error) { // 打开 Excel 文件 xlFile, err := xlsx.OpenFile(excelPath) if err != nil { return nil, err } texts := make([]string, 0) // 遍历每个 Sheet for _, sheet := range xlFile.Sheets { // 遍历每行数据 for _, row := range sheet.Rows { line := "" // 遍历每个单元格 for _, cell := range row.Cells { // 输出单元格的值 line += cell.Value } if line == "" { continue } texts = append(texts, line) } } return texts, nil } func ReadDocxAll(fileName string) (string, error) { doc, err := document.Open(fileName) if err != nil { return "", err } text := "" for _, para := range doc.Paragraphs() { //run为每个段落相同格式的文字组成的片段 for _, run := range para.Runs() { text += run.Text() } } return text, nil } //读取pdf文字内容 func ReadPdfAll(path string) (string, error) { f, r, err := pdf.Open(path) text := "" // remember close file defer f.Close() if err != nil { if err != nil { text, err = ReadPdfAll2(path) } return text, err } var buf bytes.Buffer b, err := r.GetPlainText() if err != nil { if err != nil { text, err = ReadPdfAll2(path) } return text, err } buf.ReadFrom(b) return TrimHtml(buf.String()), nil }