Beego/vendor/github.com/beego/x2j/x2j.go

657 lines
19 KiB
Go

// Unmarshal dynamic / arbitrary XML docs and extract values (using wildcards, if necessary).
// Copyright 2012-2013 Charles Banning. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file
/*
Unmarshal dynamic / arbitrary XML docs and extract values (using wildcards, if necessary).
One useful function is:
- Unmarshal(doc []byte, v interface{}) error
where v is a pointer to a variable of type 'map[string]interface{}', 'string', or
any other type supported by xml.Unmarshal().
To retrieve a value for specific tag use:
- DocValue(doc, path string, attrs ...string) (interface{},error)
- MapValue(m map[string]interface{}, path string, attr map[string]interface{}, recast ...bool) (interface{}, error)
The 'path' argument is a period-separated tag hierarchy - also known as dot-notation.
It is the program's responsibility to cast the returned value to the proper type; possible
types are the normal JSON unmarshaling types: string, float64, bool, []interface, map[string]interface{}.
To retrieve all values associated with a tag occurring anywhere in the XML document use:
- ValuesForTag(doc, tag string) ([]interface{}, error)
- ValuesForKey(m map[string]interface{}, key string) []interface{}
Demos: http://play.golang.org/p/m8zP-cpk0O
http://play.golang.org/p/cIteTS1iSg
http://play.golang.org/p/vd8pMiI21b
Returned values should be one of map[string]interface, []interface{}, or string.
All the values assocated with a tag-path that may include one or more wildcard characters -
'*' - can also be retrieved using:
- ValuesFromTagPath(doc, path string, getAttrs ...bool) ([]interface{}, error)
- ValuesFromKeyPath(map[string]interface{}, path string, getAttrs ...bool) []interface{}
Demos: http://play.golang.org/p/kUQnZ8VuhS
http://play.golang.org/p/l1aMHYtz7G
NOTE: care should be taken when using "*" at the end of a path - i.e., "books.book.*". See
the x2jpath_test.go case on how the wildcard returns all key values and collapses list values;
the same message structure can load a []interface{} or a map[string]interface{} (or an interface{})
value for a tag.
See the test cases in "x2jpath_test.go" and programs in "example" subdirectory for more.
XML PARSING CONVENTIONS
- Attributes are parsed to map[string]interface{} values by prefixing a hyphen, '-',
to the attribute label.
- If the element is a simple element and has attributes, the element value
is given the key '#text' for its map[string]interface{} representation. (See
the 'atomFeedString.xml' test data, below.)
io.Reader HANDLING
ToTree(), ToMap(), ToJson(), and ToJsonIndent() provide parsing of messages from an io.Reader.
If you want to handle a message stream, look at XmlMsgsFromReader().
NON-UTF8 CHARACTER SETS
Use the X2jCharsetReader variable to assign io.Reader for alternative character sets.
*/
package x2j
import (
"bytes"
"encoding/json"
"encoding/xml"
"errors"
"fmt"
"io"
"regexp"
"strconv"
"strings"
)
// If X2jCharsetReader != nil, it will be used to decode the doc or stream if required
// import charset "code.google.com/p/go-charset/charset"
// ...
// x2j.X2jCharsetReader = charset.NewReader
// s, err := x2j.DocToJson(doc)
var X2jCharsetReader func(charset string, input io.Reader)(io.Reader, error)
type Node struct {
dup bool // is member of a list
attr bool // is an attribute
key string // XML tag
val string // element value
nodes []*Node
}
// DocToJson - return an XML doc as a JSON string.
// If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible.
func DocToJson(doc string, recast ...bool) (string, error) {
var r bool
if len(recast) == 1 {
r = recast[0]
}
m, merr := DocToMap(doc, r)
if m == nil || merr != nil {
return "", merr
}
b, berr := json.Marshal(m)
if berr != nil {
return "", berr
}
// NOTE: don't have to worry about safe JSON marshaling with json.Marshal, since '<' and '>" are reservedin XML.
return string(b), nil
}
// DocToJsonIndent - return an XML doc as a prettified JSON string.
// If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible.
// Note: recasting is only applied to element values, not attribute values.
func DocToJsonIndent(doc string, recast ...bool) (string, error) {
var r bool
if len(recast) == 1 {
r = recast[0]
}
m, merr := DocToMap(doc, r)
if m == nil || merr != nil {
return "", merr
}
b, berr := json.MarshalIndent(m, "", " ")
if berr != nil {
return "", berr
}
// NOTE: don't have to worry about safe JSON marshaling with json.Marshal, since '<' and '>" are reservedin XML.
return string(b), nil
}
// DocToMap - convert an XML doc into a map[string]interface{}.
// (This is analogous to unmarshalling a JSON string to map[string]interface{} using json.Unmarshal().)
// If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible.
// Note: recasting is only applied to element values, not attribute values.
func DocToMap(doc string, recast ...bool) (map[string]interface{}, error) {
var r bool
if len(recast) == 1 {
r = recast[0]
}
n, err := DocToTree(doc)
if err != nil {
return nil, err
}
m := make(map[string]interface{})
m[n.key] = n.treeToMap(r)
return m, nil
}
// DocToTree - convert an XML doc into a tree of nodes.
func DocToTree(doc string) (*Node, error) {
// xml.Decoder doesn't properly handle whitespace in some doc
// see songTextString.xml test case ...
reg, _ := regexp.Compile("[ \t\n\r]*<")
doc = reg.ReplaceAllString(doc, "<")
b := bytes.NewBufferString(doc)
p := xml.NewDecoder(b)
p.CharsetReader = X2jCharsetReader
n, berr := xmlToTree("", nil, p)
if berr != nil {
return nil, berr
}
return n, nil
}
// (*Node)WriteTree - convert a tree of nodes into a printable string.
// 'padding' is the starting indentation; typically: n.WriteTree().
func (n *Node) WriteTree(padding ...int) string {
var indent int
if len(padding) == 1 {
indent = padding[0]
}
var s string
if n.val != "" {
for i := 0; i < indent; i++ {
s += " "
}
s += n.key + " : " + n.val + "\n"
} else {
for i := 0; i < indent; i++ {
s += " "
}
s += n.key + " :" + "\n"
for _, nn := range n.nodes {
s += nn.WriteTree(indent + 1)
}
}
return s
}
// xmlToTree - load a 'clean' XML doc into a tree of *Node.
func xmlToTree(skey string, a []xml.Attr, p *xml.Decoder) (*Node, error) {
n := new(Node)
n.nodes = make([]*Node, 0)
if skey != "" {
n.key = skey
if len(a) > 0 {
for _, v := range a {
na := new(Node)
na.attr = true
na.key = `-` + v.Name.Local
na.val = v.Value
n.nodes = append(n.nodes, na)
}
}
}
for {
t, err := p.Token()
if err != nil {
return nil, err
}
switch t.(type) {
case xml.StartElement:
tt := t.(xml.StartElement)
// handle root
if n.key == "" {
n.key = tt.Name.Local
if len(tt.Attr) > 0 {
for _, v := range tt.Attr {
na := new(Node)
na.attr = true
na.key = `-` + v.Name.Local
na.val = v.Value
n.nodes = append(n.nodes, na)
}
}
} else {
nn, nnerr := xmlToTree(tt.Name.Local, tt.Attr, p)
if nnerr != nil {
return nil, nnerr
}
n.nodes = append(n.nodes, nn)
}
case xml.EndElement:
// scan n.nodes for duplicate n.key values
n.markDuplicateKeys()
return n, nil
case xml.CharData:
tt := string(t.(xml.CharData))
if len(n.nodes) > 0 {
nn := new(Node)
nn.key = "#text"
nn.val = tt
n.nodes = append(n.nodes, nn)
} else {
n.val = tt
}
default:
// noop
}
}
// Logically we can't get here, but provide an error message anyway.
return nil, errors.New("Unknown parse error in xmlToTree() for: " + n.key)
}
// (*Node)markDuplicateKeys - set node.dup flag for loading map[string]interface{}.
func (n *Node) markDuplicateKeys() {
l := len(n.nodes)
for i := 0; i < l; i++ {
if n.nodes[i].dup {
continue
}
for j := i + 1; j < l; j++ {
if n.nodes[i].key == n.nodes[j].key {
n.nodes[i].dup = true
n.nodes[j].dup = true
}
}
}
}
// (*Node)treeToMap - convert a tree of nodes into a map[string]interface{}.
// (Parses to map that is structurally the same as from json.Unmarshal().)
// Note: root is not instantiated; call with: "m[n.key] = n.treeToMap(recast)".
func (n *Node) treeToMap(r bool) interface{} {
if len(n.nodes) == 0 {
return recast(n.val, r)
}
m := make(map[string]interface{}, 0)
for _, v := range n.nodes {
// just a value
if !v.dup && len(v.nodes) == 0 {
m[v.key] = recast(v.val, r)
continue
}
// a list of values
if v.dup {
var a []interface{}
if vv, ok := m[v.key]; ok {
a = vv.([]interface{})
} else {
a = make([]interface{}, 0)
}
a = append(a, v.treeToMap(r))
m[v.key] = interface{}(a)
continue
}
// it's a unique key
m[v.key] = v.treeToMap(r)
}
return interface{}(m)
}
// recast - try to cast string values to bool or float64
func recast(s string, r bool) interface{} {
if r {
// handle numeric strings ahead of boolean
if f, err := strconv.ParseFloat(s, 64); err == nil {
return interface{}(f)
}
// ParseBool treats "1"==true & "0"==false
if b, err := strconv.ParseBool(s); err == nil {
return interface{}(b)
}
}
return interface{}(s)
}
// WriteMap - dumps the map[string]interface{} for examination.
// 'offset' is initial indentation count; typically: WriteMap(m).
// NOTE: with XML all element types are 'string'.
// But code written as generic for use with maps[string]interface{} values from json.Unmarshal().
// Or it can handle a DocToMap(doc,true) result where values have been recast'd.
func WriteMap(m interface{}, offset ...int) string {
var indent int
if len(offset) == 1 {
indent = offset[0]
}
var s string
switch m.(type) {
case nil:
return "[nil] nil"
case string:
return "[string] " + m.(string)
case float64:
return "[float64] " + strconv.FormatFloat(m.(float64), 'e', 2, 64)
case bool:
return "[bool] " + strconv.FormatBool(m.(bool))
case []interface{}:
s += "[[]interface{}]"
for i, v := range m.([]interface{}) {
s += "\n"
for i := 0; i < indent; i++ {
s += " "
}
s += "[item: " + strconv.FormatInt(int64(i), 10) + "]"
switch v.(type) {
case string, float64, bool:
s += "\n"
default:
// noop
}
for i := 0; i < indent; i++ {
s += " "
}
s += WriteMap(v, indent+1)
}
case map[string]interface{}:
for k, v := range m.(map[string]interface{}) {
s += "\n"
for i := 0; i < indent; i++ {
s += " "
}
// s += "[map[string]interface{}] "+k+" :"+WriteMap(v,indent+1)
s += k + " :" + WriteMap(v, indent+1)
}
default:
// shouldn't ever be here ...
s += fmt.Sprintf("unknown type for: %v", m)
}
return s
}
// ------------------------ value extraction from XML doc --------------------------
// DocValue - return a value for a specific tag
// 'doc' is a valid XML message.
// 'path' is a hierarchy of XML tags, e.g., "doc.name".
// 'attrs' is an OPTIONAL list of "name:value" pairs for attributes.
// Note: 'recast' is not enabled here. Use DocToMap(), NewAttributeMap(), and MapValue() calls for that.
func DocValue(doc, path string, attrs ...string) (interface{}, error) {
n, err := DocToTree(doc)
if err != nil {
return nil, err
}
m := make(map[string]interface{})
m[n.key] = n.treeToMap(false)
a, aerr := NewAttributeMap(attrs...)
if aerr != nil {
return nil, aerr
}
v, verr := MapValue(m, path, a)
if verr != nil {
return nil, verr
}
return v, nil
}
// MapValue - retrieves value based on walking the map, 'm'.
// 'm' is the map value of interest.
// 'path' is a period-separated hierarchy of keys in the map.
// 'attr' is a map of attribute "name:value" pairs from NewAttributeMap(). May be 'nil'.
// If the path can't be traversed, an error is returned.
// Note: the optional argument 'r' can be used to coerce attribute values, 'attr', if done so for 'm'.
func MapValue(m map[string]interface{}, path string, attr map[string]interface{}, r ...bool) (interface{}, error) {
// attribute values may have been recasted during map construction; default is 'false'.
if len(r) == 1 && r[0] == true {
for k, v := range attr {
attr[k] = recast(v.(string), true)
}
}
// parse the path
keys := strings.Split(path, ".")
// initialize return value to 'm' so a path of "" will work correctly
var v interface{} = m
var ok bool
var okey string
var isMap bool = true
if keys[0] == "" && len(attr) == 0 {
return v, nil
}
for _, key := range keys {
if !isMap {
return nil, errors.New("no keys beyond: " + okey)
}
if v, ok = m[key]; !ok {
return nil, errors.New("no key in map: " + key)
} else {
switch v.(type) {
case map[string]interface{}:
m = v.(map[string]interface{})
isMap = true
default:
isMap = false
}
}
// save 'key' for error reporting
okey = key
}
// match attributes; value is "#text" or nil
if attr == nil {
return v, nil
}
return hasAttributes(v, attr)
}
// hasAttributes() - interface{} equality works for string, float64, bool
func hasAttributes(v interface{}, a map[string]interface{}) (interface{}, error) {
switch v.(type) {
case []interface{}:
// run through all entries looking one with matching attributes
for _, vv := range v.([]interface{}) {
if vvv, vvverr := hasAttributes(vv, a); vvverr == nil {
return vvv, nil
}
}
return nil, errors.New("no list member with matching attributes")
case map[string]interface{}:
// do all attribute name:value pairs match?
nv := v.(map[string]interface{})
for key, val := range a {
if vv, ok := nv[key]; !ok {
return nil, errors.New("no attribute with name: " + key[1:])
} else if val != vv {
return nil, errors.New("no attribute key:value pair: " + fmt.Sprintf("%s:%v", key[1:], val))
}
}
// they all match; so return value associated with "#text" key.
if vv, ok := nv["#text"]; ok {
return vv, nil
} else {
// this happens when another element is value of tag rather than just a string value
return nv, nil
}
}
return nil, errors.New("no match for attributes")
}
// NewAttributeMap() - generate map of attributes=value entries as map["-"+string]string.
// 'kv' arguments are "name:value" pairs that appear as attributes, name="value".
// If len(kv) == 0, the return is (nil, nil).
func NewAttributeMap(kv ...string) (map[string]interface{}, error) {
if len(kv) == 0 {
return nil, nil
}
m := make(map[string]interface{}, 0)
for _, v := range kv {
vv := strings.Split(v, ":")
if len(vv) != 2 {
return nil, errors.New("attribute not \"name:value\" pair: " + v)
}
// attributes are stored as keys prepended with hyphen
m["-"+vv[0]] = interface{}(vv[1])
}
return m, nil
}
//------------------------- get values for key ----------------------------
// ValuesForTag - return all values in doc associated with 'tag'.
// Returns nil if the 'tag' does not occur in the doc.
// If there is an error encounted while parsing doc, that is returned.
// If you want values 'recast' use DocToMap() and ValuesForKey().
func ValuesForTag(doc, tag string) ([]interface{}, error) {
m, err := DocToMap(doc)
if err != nil {
return nil, err
}
return ValuesForKey(m, tag), nil
}
// ValuesForKey - return all values in map associated with 'key'
// Returns nil if the 'key' does not occur in the map
func ValuesForKey(m map[string]interface{}, key string) []interface{} {
ret := make([]interface{}, 0)
hasKey(m, key, &ret)
if len(ret) > 0 {
return ret
}
return nil
}
// hasKey - if the map 'key' exists append it to array
// if it doesn't do nothing except scan array and map values
func hasKey(iv interface{}, key string, ret *[]interface{}) {
switch iv.(type) {
case map[string]interface{}:
vv := iv.(map[string]interface{})
if v, ok := vv[key]; ok {
*ret = append(*ret, v)
}
for _, v := range iv.(map[string]interface{}) {
hasKey(v, key, ret)
}
case []interface{}:
for _, v := range iv.([]interface{}) {
hasKey(v, key, ret)
}
}
}
// ======== 2013.07.01 - x2j.Unmarshal, wraps xml.Unmarshal ==============
// Unmarshal - wraps xml.Unmarshal with handling of map[string]interface{}
// and string type variables.
// Usage: x2j.Unmarshal(doc,&m) where m of type map[string]interface{}
// x2j.Unmarshal(doc,&s) where s of type string (Overrides xml.Unmarshal().)
// x2j.Unmarshal(doc,&struct) - passed to xml.Unmarshal()
// x2j.Unmarshal(doc,&slice) - passed to xml.Unmarshal()
func Unmarshal(doc []byte, v interface{}) error {
switch v.(type) {
case *map[string]interface{}:
m, err := ByteDocToMap(doc)
vv := *v.(*map[string]interface{})
for k, v := range m {
vv[k] = v
}
return err
case *string:
s, err := ByteDocToJson(doc)
*(v.(*string)) = s
return err
default:
b := bytes.NewBuffer(doc)
p := xml.NewDecoder(b)
p.CharsetReader = X2jCharsetReader
return p.Decode(v)
// return xml.Unmarshal(doc, v)
}
return nil
}
// ByteDocToJson - return an XML doc as a JSON string.
// If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible.
func ByteDocToJson(doc []byte, recast ...bool) (string, error) {
var r bool
if len(recast) == 1 {
r = recast[0]
}
m, merr := ByteDocToMap(doc, r)
if m == nil || merr != nil {
return "", merr
}
b, berr := json.Marshal(m)
if berr != nil {
return "", berr
}
// NOTE: don't have to worry about safe JSON marshaling with json.Marshal, since '<' and '>" are reservedin XML.
return string(b), nil
}
// ByteDocToMap - convert an XML doc into a map[string]interface{}.
// (This is analogous to unmarshalling a JSON string to map[string]interface{} using json.Unmarshal().)
// If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible.
// Note: recasting is only applied to element values, not attribute values.
func ByteDocToMap(doc []byte, recast ...bool) (map[string]interface{}, error) {
var r bool
if len(recast) == 1 {
r = recast[0]
}
n, err := ByteDocToTree(doc)
if err != nil {
return nil, err
}
m := make(map[string]interface{})
m[n.key] = n.treeToMap(r)
return m, nil
}
// ByteDocToTree - convert an XML doc into a tree of nodes.
func ByteDocToTree(doc []byte) (*Node, error) {
// xml.Decoder doesn't properly handle whitespace in some doc
// see songTextString.xml test case ...
reg, _ := regexp.Compile("[ \t\n\r]*<")
doc = reg.ReplaceAll(doc, []byte("<"))
b := bytes.NewBuffer(doc)
p := xml.NewDecoder(b)
p.CharsetReader = X2jCharsetReader
n, berr := xmlToTree("", nil, p)
if berr != nil {
return nil, berr
}
return n, nil
}