// Copyright 2014 The Go Authors.  All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package dwarf

// This file implements the mapping from PC to lines.
// TODO: Find a way to test this properly.

// http://www.dwarfstd.org/doc/DWARF4.pdf Section 6.2 page 108

import (
	"fmt"
	"sort"
	"strings"
)

// PCToLine returns the file and line number corresponding to the PC value.
// It returns an error if a correspondence cannot be found.
func (d *Data) PCToLine(pc uint64) (file string, line uint64, err error) {
	c := d.pcToLineEntries
	if len(c) == 0 {
		return "", 0, fmt.Errorf("PCToLine: no line table")
	}
	i := sort.Search(len(c), func(i int) bool { return c[i].pc > pc }) - 1
	// c[i] is now the entry in pcToLineEntries with the largest pc that is not
	// larger than the query pc.
	// The search has failed if:
	// - All pcs in c were larger than the query pc (i == -1).
	// - c[i] marked the end of a sequence of instructions (c[i].file == 0).
	// - c[i] is the last element of c, and isn't the end of a sequence of
	//   instructions, and the search pc is much larger than c[i].pc.  In this
	//   case, we don't know the range of the last instruction, but the search
	//   pc is probably past it.
	if i == -1 || c[i].file == 0 || (i+1 == len(c) && pc-c[i].pc > 1024) {
		return "", 0, fmt.Errorf("no source line defined for PC %#x", pc)
	}
	if c[i].file >= uint64(len(d.sourceFiles)) {
		return "", 0, fmt.Errorf("invalid file number in DWARF data")
	}
	return d.sourceFiles[c[i].file], c[i].line, nil
}

// LineToBreakpointPCs returns the PCs that should be used as breakpoints
// corresponding to the given file and line number.
// It returns an empty slice if no PCs were found.
func (d *Data) LineToBreakpointPCs(file string, line uint64) ([]uint64, error) {
	compDir := d.compilationDirectory()

	// Find the closest match in the executable for the specified file.
	// We choose the file with the largest number of path components matching
	// at the end of the name. If there is a tie, we prefer files that are
	// under the compilation directory.  If there is still a tie, we choose
	// the file with the shortest name.
	// TODO: handle duplicate file names in the DWARF?
	var bestFile struct {
		fileNum    uint64 // Index of the file in the DWARF data.
		components int    // Number of matching path components.
		length     int    // Length of the filename.
		underComp  bool   // File is under the compilation directory.
	}
	for filenum, filename := range d.sourceFiles {
		c := matchingPathComponentSuffixSize(filename, file)
		underComp := strings.HasPrefix(filename, compDir)
		better := false
		if c != bestFile.components {
			better = c > bestFile.components
		} else if underComp != bestFile.underComp {
			better = underComp
		} else {
			better = len(filename) < bestFile.length
		}
		if better {
			bestFile.fileNum = uint64(filenum)
			bestFile.components = c
			bestFile.length = len(filename)
			bestFile.underComp = underComp
		}
	}
	if bestFile.components == 0 {
		return nil, fmt.Errorf("couldn't find file %q", file)
	}

	c := d.lineToPCEntries[bestFile.fileNum]
	// c contains all (pc, line) pairs for the appropriate file.
	start := sort.Search(len(c), func(i int) bool { return c[i].line >= line })
	end := sort.Search(len(c), func(i int) bool { return c[i].line > line })
	// c[i].line == line for all i in the range [start, end).
	pcs := make([]uint64, 0, end-start)
	for i := start; i < end; i++ {
		pcs = append(pcs, c[i].pc)
	}
	return pcs, nil
}

// compilationDirectory finds the first compilation unit entry in d and returns
// the compilation directory contained in it.
// If it fails, it returns the empty string.
func (d *Data) compilationDirectory() string {
	r := d.Reader()
	for {
		entry, err := r.Next()
		if entry == nil || err != nil {
			return ""
		}
		if entry.Tag == TagCompileUnit {
			name, _ := entry.Val(AttrCompDir).(string)
			return name
		}
	}
}

// matchingPathComponentSuffixSize returns the largest n such that the last n
// components of the paths p1 and p2 are equal.
// e.g. matchingPathComponentSuffixSize("a/b/x/y.go", "b/a/x/y.go") returns 2.
func matchingPathComponentSuffixSize(p1, p2 string) int {
	// TODO: deal with other path separators.
	c1 := strings.Split(p1, "/")
	c2 := strings.Split(p2, "/")
	min := len(c1)
	if len(c2) < min {
		min = len(c2)
	}
	var n int
	for n = 0; n < min; n++ {
		if c1[len(c1)-1-n] != c2[len(c2)-1-n] {
			break
		}
	}
	return n
}

// Standard opcodes. Figure 37, page 178.
// If an opcode >= lineMachine.prologue.opcodeBase, it is a special
// opcode rather than the opcode defined in this table.
const (
	lineStdCopy             = 0x01
	lineStdAdvancePC        = 0x02
	lineStdAdvanceLine      = 0x03
	lineStdSetFile          = 0x04
	lineStdSetColumn        = 0x05
	lineStdNegateStmt       = 0x06
	lineStdSetBasicBlock    = 0x07
	lineStdConstAddPC       = 0x08
	lineStdFixedAdvancePC   = 0x09
	lineStdSetPrologueEnd   = 0x0a
	lineStdSetEpilogueBegin = 0x0b
	lineStdSetISA           = 0x0c
)

// Extended opcodes. Figure 38, page 179.
const (
	lineStartExtendedOpcode = 0x00 // Not defined as a named constant in the spec.
	lineExtEndSequence      = 0x01
	lineExtSetAddress       = 0x02
	lineExtDefineFile       = 0x03
	lineExtSetDiscriminator = 0x04 // New in version 4.
	lineExtLoUser           = 0x80
	lineExtHiUser           = 0xff
)

// lineHeader holds the information stored in the header of the line table for a
// single compilation unit.
// Section 6.2.4, page 112.
type lineHeader struct {
	unitLength           int
	version              int
	headerLength         int
	minInstructionLength int
	maxOpsPerInstruction int
	defaultIsStmt        bool
	lineBase             int
	lineRange            int
	opcodeBase           byte
	stdOpcodeLengths     []byte
	include              []string   // entry 0 is empty; means current directory
	file                 []lineFile // entry 0 is empty.
}

// lineFile represents a file name stored in the PC/line table, usually in the header.
type lineFile struct {
	name   string
	index  int // index into include directories
	time   int // implementation-defined time of last modification
	length int // length in bytes, 0 if not available.
}

// lineMachine holds the registers evaluated during executing of the PC/line mapping engine.
// Section 6.2.2, page 109.
type lineMachine struct {
	// The program-counter value corresponding to a machine instruction generated by the compiler.
	address uint64

	// An unsigned integer representing the index of an operation within a VLIW
	// instruction. The index of the first operation is 0. For non-VLIW
	// architectures, this register will always be 0.
	// The address and op_index registers, taken together, form an operation
	// pointer that can reference any individual operation with the instruction
	// stream.
	opIndex uint64

	// An unsigned integer indicating the identity of the source file corresponding to a machine instruction.
	file uint64

	// An unsigned integer indicating a source line number. Lines are numbered
	// beginning at 1. The compiler may emit the value 0 in cases where an
	// instruction cannot be attributed to any source line.
	line uint64

	// An unsigned integer indicating a column number within a source line.
	// Columns are numbered beginning at 1. The value 0 is reserved to indicate
	// that a statement begins at the “left edge” of the line.
	column uint64

	// A boolean indicating that the current instruction is a recommended
	// breakpoint location. A recommended breakpoint location is intended to
	// “represent” a line, a statement and/or a semantically distinct subpart of a
	// statement.
	isStmt bool

	// A boolean indicating that the current instruction is the beginning of a basic
	// block.
	basicBlock bool

	// A boolean indicating that the current address is that of the first byte after
	// the end of a sequence of target machine instructions. end_sequence
	// terminates a sequence of lines; therefore other information in the same
	// row is not meaningful.
	endSequence bool

	// A boolean indicating that the current address is one (of possibly many)
	// where execution should be suspended for an entry breakpoint of a
	// function.
	prologueEnd bool

	// A boolean indicating that the current address is one (of possibly many)
	// where execution should be suspended for an exit breakpoint of a function.
	epilogueBegin bool

	// An unsigned integer whose value encodes the applicable instruction set
	// architecture for the current instruction.
	// The encoding of instruction sets should be shared by all users of a given
	// architecture. It is recommended that this encoding be defined by the ABI
	// authoring committee for each architecture.
	isa uint64

	// An unsigned integer identifying the block to which the current instruction
	// belongs. Discriminator values are assigned arbitrarily by the DWARF
	// producer and serve to distinguish among multiple blocks that may all be
	// associated with the same source file, line, and column. Where only one
	// block exists for a given source position, the discriminator value should be
	// zero.
	discriminator uint64

	// The header for the current compilation unit.
	// Not an actual register, but stored here for cleanliness.
	header lineHeader
}

// parseHeader parses the header describing the compilation unit in the line
// table starting at the specified offset.
func (m *lineMachine) parseHeader(b *buf) error {
	m.header = lineHeader{}
	m.header.unitLength = int(b.uint32()) // Note: We are assuming 32-bit DWARF format.
	if m.header.unitLength > len(b.data) {
		return fmt.Errorf("DWARF: bad PC/line header length")
	}
	m.header.version = int(b.uint16())
	m.header.headerLength = int(b.uint32())
	m.header.minInstructionLength = int(b.uint8())
	if m.header.version >= 4 {
		m.header.maxOpsPerInstruction = int(b.uint8())
	} else {
		m.header.maxOpsPerInstruction = 1
	}
	m.header.defaultIsStmt = b.uint8() != 0
	m.header.lineBase = int(int8(b.uint8()))
	m.header.lineRange = int(b.uint8())
	m.header.opcodeBase = b.uint8()
	m.header.stdOpcodeLengths = make([]byte, m.header.opcodeBase-1)
	copy(m.header.stdOpcodeLengths, b.bytes(int(m.header.opcodeBase-1)))
	m.header.include = make([]string, 1) // First entry is empty; file index entries are 1-indexed.
	// Includes
	for {
		name := b.string()
		if name == "" {
			break
		}
		m.header.include = append(m.header.include, name)
	}
	// Files
	m.header.file = make([]lineFile, 1, 10) // entries are 1-indexed in line number program.
	for {
		name := b.string()
		if name == "" {
			break
		}
		index := b.uint()
		time := b.uint()
		length := b.uint()
		f := lineFile{
			name:   name,
			index:  int(index),
			time:   int(time),
			length: int(length),
		}
		m.header.file = append(m.header.file, f)
	}
	return nil
}

// Special opcodes, page 117.
// There are seven steps to processing special opcodes.  We break them up here
// because the caller needs to output a row between steps 2 and 4, and because
// we need to perform just step 2 for the opcode DW_LNS_const_add_pc.

func (m *lineMachine) specialOpcodeStep1(opcode byte) {
	adjustedOpcode := int(opcode - m.header.opcodeBase)
	lineAdvance := m.header.lineBase + (adjustedOpcode % m.header.lineRange)
	m.line += uint64(lineAdvance)
}

func (m *lineMachine) specialOpcodeStep2(opcode byte) {
	adjustedOpcode := int(opcode - m.header.opcodeBase)
	advance := adjustedOpcode / m.header.lineRange
	delta := (int(m.opIndex) + advance) / m.header.maxOpsPerInstruction
	m.address += uint64(m.header.minInstructionLength * delta)
	m.opIndex = (m.opIndex + uint64(advance)) % uint64(m.header.maxOpsPerInstruction)
}

func (m *lineMachine) specialOpcodeSteps4To7() {
	m.basicBlock = false
	m.prologueEnd = false
	m.epilogueBegin = false
	m.discriminator = 0
}

// evalCompilationUnit reads the next compilation unit and calls f at each output row.
// Line machine execution continues while f returns true.
func (m *lineMachine) evalCompilationUnit(b *buf, f func(m *lineMachine) (cont bool)) error {
	m.reset()
	for len(b.data) > 0 {
		op := b.uint8()
		if op >= m.header.opcodeBase {
			m.specialOpcodeStep1(op)
			m.specialOpcodeStep2(op)
			// Step 3 is to output a row, so we call f here.
			if !f(m) {
				return nil
			}
			m.specialOpcodeSteps4To7()
			continue
		}
		switch op {
		case lineStartExtendedOpcode:
			if len(b.data) == 0 {
				return fmt.Errorf("DWARF: short extended opcode (1)")
			}
			size := b.uint()
			if uint64(len(b.data)) < size {
				return fmt.Errorf("DWARF: short extended opcode (2)")
			}
			op = b.uint8()
			switch op {
			case lineExtEndSequence:
				m.endSequence = true
				if !f(m) {
					return nil
				}
				if len(b.data) == 0 {
					return nil
				}
				m.reset()
			case lineExtSetAddress:
				m.address = b.addr()
				m.opIndex = 0
			case lineExtDefineFile:
				return fmt.Errorf("DWARF: unimplemented define_file op")
			case lineExtSetDiscriminator:
				discriminator := b.uint()
				m.discriminator = discriminator
			default:
				return fmt.Errorf("DWARF: unknown extended opcode %#x", op)
			}
		case lineStdCopy:
			if !f(m) {
				return nil
			}
			m.discriminator = 0
			m.basicBlock = false
			m.prologueEnd = false
			m.epilogueBegin = false
		case lineStdAdvancePC:
			advance := b.uint()
			delta := (int(m.opIndex) + int(advance)) / m.header.maxOpsPerInstruction
			m.address += uint64(m.header.minInstructionLength * delta)
			m.opIndex = (m.opIndex + uint64(advance)) % uint64(m.header.maxOpsPerInstruction)
			m.basicBlock = false
			m.prologueEnd = false
			m.epilogueBegin = false
			m.discriminator = 0
		case lineStdAdvanceLine:
			advance := b.int()
			m.line = uint64(int64(m.line) + advance)
		case lineStdSetFile:
			index := b.uint()
			m.file = index
		case lineStdSetColumn:
			column := b.uint()
			m.column = column
		case lineStdNegateStmt:
			m.isStmt = !m.isStmt
		case lineStdSetBasicBlock:
			m.basicBlock = true
		case lineStdFixedAdvancePC:
			m.address += uint64(b.uint16())
			m.opIndex = 0
		case lineStdSetPrologueEnd:
			m.prologueEnd = true
		case lineStdSetEpilogueBegin:
			m.epilogueBegin = true
		case lineStdSetISA:
			m.isa = b.uint()
		case lineStdConstAddPC:
			// Update the the address and op_index registers.
			m.specialOpcodeStep2(255)
		default:
			panic("not reached")
		}
	}
	return fmt.Errorf("DWARF: unexpected end of line number information")
}

// reset sets the machine's registers to the initial state. Page 111.
func (m *lineMachine) reset() {
	m.address = 0
	m.opIndex = 0
	m.file = 1
	m.line = 1
	m.column = 0
	m.isStmt = m.header.defaultIsStmt
	m.basicBlock = false
	m.endSequence = false
	m.prologueEnd = false
	m.epilogueBegin = false
	m.isa = 0
	m.discriminator = 0
}