Beego/vendor/github.com/beego/x2j/x2m_bulk.go

202 lines
5.4 KiB
Go

// Copyright 2012-2013 Charles Banning. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file
// x2m_bulk.go: Process files with multiple XML messages.
package x2j
import (
"bytes"
"encoding/xml"
"errors"
"io"
"os"
"regexp"
"sync"
)
// XmlMsgsFromFile()
// 'fname' is name of file
// 'phandler' is the map processing handler. Return of 'false' stops further processing.
// 'ehandler' is the parsing error handler. Return of 'false' stops further processing and returns error.
// Note: phandler() and ehandler() calls are blocking, so reading and processing of messages is serialized.
// This means that you can stop reading the file on error or after processing a particular message.
// To have reading and handling run concurrently, pass arguments to a go routine in handler and return true.
func XmlMsgsFromFile(fname string, phandler func(map[string]interface{})(bool), ehandler func(error)(bool), recast ...bool) error {
var r bool
if len(recast) == 1 {
r = recast[0]
}
fi, fierr := os.Stat(fname)
if fierr != nil {
return fierr
}
fh, fherr := os.Open(fname)
if fherr != nil {
return fherr
}
defer fh.Close()
buf := make([]byte,fi.Size())
_, rerr := fh.Read(buf)
if rerr != nil {
return rerr
}
doc := string(buf)
// xml.Decoder doesn't properly handle whitespace in some doc
// see songTextString.xml test case ...
reg,_ := regexp.Compile("[ \t\n\r]*<")
doc = reg.ReplaceAllString(doc,"<")
b := bytes.NewBufferString(doc)
for {
m, merr := XmlBufferToMap(b,r)
if merr != nil && merr != io.EOF {
if ok := ehandler(merr); !ok {
// caused reader termination
return merr
}
}
if m != nil {
if ok := phandler(m); !ok {
break
}
}
if merr == io.EOF {
break
}
}
return nil
}
// XmlBufferToMap - process XML message from a bytes.Buffer
// 'b' is the buffer
// Optional argument 'recast' coerces map values to float64 or bool where possible.
func XmlBufferToMap(b *bytes.Buffer,recast ...bool) (map[string]interface{},error) {
var r bool
if len(recast) == 1 {
r = recast[0]
}
n,err := XmlBufferToTree(b)
if err != nil {
return nil, err
}
m := make(map[string]interface{})
m[n.key] = n.treeToMap(r)
return m,nil
}
// BufferToTree - derived from DocToTree()
func XmlBufferToTree(b *bytes.Buffer) (*Node, error) {
p := xml.NewDecoder(b)
p.CharsetReader = X2jCharsetReader
n, berr := xmlToTree("",nil,p)
if berr != nil {
return nil, berr
}
return n,nil
}
// XmlBuffer - create XML decoder buffer for a string from anywhere, not necessarily a file.
type XmlBuffer struct {
cnt uint64
str *string
buf *bytes.Buffer
}
var mtx sync.Mutex
var cnt uint64
var activeXmlBufs = make(map[uint64]*XmlBuffer)
// NewXmlBuffer() - creates a bytes.Buffer from a string with multiple messages
// Use Close() function to release the buffer for garbage collection.
func NewXmlBuffer(s string) *XmlBuffer {
// xml.Decoder doesn't properly handle whitespace in some doc
// see songTextString.xml test case ...
reg,_ := regexp.Compile("[ \t\n\r]*<")
s = reg.ReplaceAllString(s,"<")
b := bytes.NewBufferString(s)
buf := new(XmlBuffer)
buf.str = &s
buf.buf = b
mtx.Lock()
defer mtx.Unlock()
buf.cnt = cnt ; cnt++
activeXmlBufs[buf.cnt] = buf
return buf
}
// BytesNewXmlBuffer() - creates a bytes.Buffer from b with possibly multiple messages
// Use Close() function to release the buffer for garbage collection.
func BytesNewXmlBuffer(b []byte) *XmlBuffer {
bb := bytes.NewBuffer(b)
buf := new(XmlBuffer)
buf.buf = bb
mtx.Lock()
defer mtx.Unlock()
buf.cnt = cnt ; cnt++
activeXmlBufs[buf.cnt] = buf
return buf
}
// Close() - release the buffer address for garbage collection
func (buf *XmlBuffer)Close() {
mtx.Lock()
defer mtx.Unlock()
delete(activeXmlBufs,buf.cnt)
}
// NextMap() - retrieve next XML message in buffer as a map[string]interface{} value.
// The optional argument 'recast' will try and coerce values to float64 or bool as appropriate.
func (buf *XmlBuffer)NextMap(recast ...bool) (map[string]interface{}, error) {
var r bool
if len(recast) == 1 {
r = recast[0]
}
if _, ok := activeXmlBufs[buf.cnt]; !ok {
return nil, errors.New("Buffer is not active.")
}
return XmlBufferToMap(buf.buf,r)
}
// ============================= io.Reader version for stream processing ======================
// XmlMsgsFromReader() - io.Reader version of XmlMsgsFromFile
// 'rdr' is an io.Reader for an XML message (stream)
// 'phandler' is the map processing handler. Return of 'false' stops further processing.
// 'ehandler' is the parsing error handler. Return of 'false' stops further processing and returns error.
// Note: phandler() and ehandler() calls are blocking, so reading and processing of messages is serialized.
// This means that you can stop reading the file on error or after processing a particular message.
// To have reading and handling run concurrently, pass arguments to a go routine in handler and return true.
func XmlMsgsFromReader(rdr io.Reader, phandler func(map[string]interface{})(bool), ehandler func(error)(bool), recast ...bool) error {
var r bool
if len(recast) == 1 {
r = recast[0]
}
for {
m, merr := ToMap(rdr,r)
if merr != nil && merr != io.EOF {
if ok := ehandler(merr); !ok {
// caused reader termination
return merr
}
}
if m != nil {
if ok := phandler(m); !ok {
break
}
}
if merr == io.EOF {
break
}
}
return nil
}