initial commit - go-bgp - a collection of golang BGP tools to monitor, archive and serve

commit f36b04125906b8f654a46650d83d7e17caa1e8d5
Author: dsp <dsp@2f30.org>
Date:   Tue, 10 Feb 2015 19:15:16 -0700

initial commit

Diffstat:
A Makefile  | 12 ++++++++++++
A README  | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A archive/archive.go  | 607 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A bgp.go  | 1 +
A cmd/archive_server.go  | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
A doc/draft-ietf-grow-mrt-11.txt  | 1625 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A doc/rfc1771.txt  | 3195 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A doc/rfc4360.txt  | 675 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A doc/rfc4364.txt  | 2635 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A monitor/monitor_backend.go  | 22 ++++++++++++++++++++++
A monitor/monitor_backend_test.go  | 32 ++++++++++++++++++++++++++++++++
A mrt/mrt.go  | 377 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A mrt/mrt_test.go  | 118 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A tests/mrt1  | 0 
A tests/mrt2  | 0 
A tests/mrt3  | 0

16 files changed, 9410 insertions(+), 0 deletions(-)
diff --git a/Makefile b/Makefile
@@ -0,0 +1,12 @@
+all: allbin
+
+test:
+	go test ./...
+
+allbin: cmd/archive_server.go
+	go build cmd/archive_server.go
+
+clean:
+	rm -f archive_server
+	go clean
+
diff --git a/README b/README
@@ -0,0 +1,60 @@
+=====================================================================
+		go-bgp by DsP <dsp@2f30.org>
+=====================================================================
+[General]
+go-bgp is a collection of pure golang libraries and tools for:
+reading and writing MRT files
+parsing BGP messages
+exposing archived BGP messages in various formats over RESTful HTTP/2
+
+[Details]
+mrt/
+	Using this module from golang allows you to open a file and then using
+	bufio to set the splitfunc to the provided SplitMrt
+	then using .Scan() you can get the []bytes of each MRT message
+	
+
+archive/
+	This module allows the scanning of hierarchical dated dirs 
+	that contain either XML encoded files or MRT files.
+	then it exposes an API that allows requests in the form of
+	http://host:port/archive?start=YYYYMMDDHHMMSS&end=YYYYMMDDHHMMSS&type=mrt    
+	or
+	http://host:port/archive/conf?range
+	http://host:port/archive/conf?files
+
+monitor/
+	bgp monitor
+
+tests/
+	contain data for unit tests
+
+doc/
+	relevant RFCs
+
+cmd/
+	executable programs
+
+[License]
+Copyright (c) 2015, dsp <dsp@2f30.org>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation and/or
+other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/archive/archive.go b/archive/archive.go
@@ -0,0 +1,607 @@
+package archive
+
+import (
+	"errors"
+	"fmt"
+	"log"
+	"net/http"
+	"net/url"
+	//"io/ioutil"
+	//"bytes"
+	"bufio"
+	"compress/bzip2"
+	mrt "go-bgp/mrt"
+	"io"
+	"os"
+	"path/filepath"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+	"unicode"
+)
+
+const (
+	GET    = "GET"
+	PUT    = "PUT"
+	POST   = "POST"
+	DELETE = "DELETE"
+)
+
+var (
+	errbadreq  = errors.New("malformed request")
+	errbaddate = errors.New("dates should be in a YYYYMMDDHHMM format and start should be earlier than end")
+	errempty   = errors.New("archive empty")
+	errdate    = errors.New("no such date in archive")
+)
+
+type Resource interface {
+	Get(url.Values) (int, chan reply)
+	Put(url.Values) (int, chan reply)
+	Post(url.Values) (int, chan reply)
+	Delete(url.Values) (int, chan reply)
+}
+
+type (
+	GetNotAllowed    struct{}
+	PutNotAllowed    struct{}
+	PostNotAllowed   struct{}
+	DeleteNotAllowed struct{}
+)
+
+func (GetNotAllowed) Get(vals url.Values) (int, chan reply) {
+	return 405, nil
+}
+
+func (PutNotAllowed) Put(vals url.Values) (int, chan reply) {
+	return 405, nil
+}
+func (PostNotAllowed) Post(vals url.Values) (int, chan reply) {
+	return 405, nil
+}
+func (DeleteNotAllowed) Delete(vals url.Values) (int, chan reply) {
+	return 405, nil
+}
+
+type API struct{}
+
+func (api *API) requestHandlerFunc(resource Resource) http.HandlerFunc {
+	return func(rw http.ResponseWriter, req *http.Request) {
+		var (
+			datac chan reply
+			code  int
+		)
+		req.ParseForm()
+		method := req.Method
+		vals := req.Form
+		switch method {
+		case GET:
+			code, datac = resource.Get(vals)
+		case PUT:
+			code, datac = resource.Put(vals)
+		case POST:
+			code, datac = resource.Post(vals)
+		case DELETE:
+			code, datac = resource.Delete(vals)
+		}
+		rw.WriteHeader(code)
+		if datac != nil { // we got a proper channel to get datafrom
+			//go func(dc <-chan reply) { // fire a goroutine that will end upon the chan getting closed
+			for r := range datac {
+				if r.err == nil {
+					rw.Write(r.data)
+				} else {
+					log.Printf("Error in received from data channel:%s\n", r.err)
+					rw.Write([]byte(fmt.Sprintf("%s\n", r.err)))
+				}
+			}
+			//}(datac)
+		}
+	}
+}
+
+func (api *API) AddResource(resource Resource, path string) {
+	http.HandleFunc(path, api.requestHandlerFunc(resource))
+}
+
+func (api *API) Start(port int) {
+	portstr := fmt.Sprintf(":%d", port)
+	http.ListenAndServe(portstr, nil)
+}
+
+type reply struct {
+	data []byte
+	err  error
+}
+
+//To perform a query asynchronously on possibly many files we fire multiple goroutines
+//that all write their results to chan reply, and we also need the waitgroup
+//to know when we should close the channel to end the http transaction
+type archive interface {
+	Query(time.Time, time.Time, chan reply, *sync.WaitGroup)
+	visit(string, os.FileInfo, error) error
+}
+
+type xmlstring struct {
+	timestr string
+	msg     string
+	time    time.Time
+}
+
+func (x *xmlstring) String() string {
+	return x.msg
+}
+
+//implements Sort interface by time.Time
+type archentryfile struct {
+	path  string
+	sdate time.Time
+	sz    int64
+}
+
+type timeentryslice []archentryfile
+
+func (p timeentryslice) Len() int {
+	return len(p)
+}
+
+func (p timeentryslice) Less(i, j int) bool {
+	return p[i].sdate.Before(p[j].sdate)
+}
+
+func (p timeentryslice) Swap(i, j int) {
+	p[i], p[j] = p[j], p[i]
+}
+
+type fsarchive struct {
+	rootpathstr    string
+	entryfiles     *timeentryslice
+	tempentryfiles timeentryslice
+	curyr          int
+	curmon         int
+	curday         int
+	reqchan        chan string
+	scanning       bool
+	Scanwg         *sync.WaitGroup // expose it so callers are able to wait for scan to finish
+	scanch         chan struct{}
+	timedelta      time.Duration
+	descriminator  string
+	conf           *fsarconf
+	//present tha archive as a restful resource
+	PutNotAllowed
+	PostNotAllowed
+	DeleteNotAllowed
+}
+
+type mrtarchive struct {
+	*fsarchive
+}
+
+type xmlarchive struct {
+	*fsarchive
+}
+
+type fsarconf struct {
+	arfiles *timeentryslice
+	PutNotAllowed
+	PostNotAllowed
+	DeleteNotAllowed
+}
+
+//in order not to block in gets, we need to
+//fire a new goroutine to send the reply on the channel
+// the reason is that we create the channel here and we must
+//return it to the responsewriter and any sends would block
+//without the receiver being ready.
+func (fsc *fsarconf) Get(values url.Values) (int, chan reply) {
+	retc := make(chan reply)
+	go func() {
+		defer close(retc) //must close the chan to let the listener finish.
+		if fsc.arfiles == nil {
+			log.Printf("nil arfile in fsarconf. ignoring request\n")
+			return
+		}
+		if _, ok := values["range"]; ok {
+			if len(*fsc.arfiles) > 0 {
+				f := *fsc.arfiles
+				dates := fmt.Sprintf("%s - %s\n", f[0].sdate, f[len(f)-1].sdate)
+				retc <- reply{data: []byte(dates), err: nil}
+				return
+			}
+			retc <- reply{data: nil, err: errempty}
+			return
+		}
+		if _, ok := values["files"]; ok {
+			for _, f := range *fsc.arfiles {
+				retc <- reply{data: []byte(fmt.Sprintf("%s\n", filepath.Base(f.path))), err: nil}
+			}
+			return
+		}
+		return
+	}()
+	return 200, retc
+}
+
+func (fsa *fsarchive) GetImpl(values url.Values, ar archive) (int, chan reply) {
+	var grwg sync.WaitGroup
+	retc := make(chan reply)
+	timeAstrs, ok1 := values["start"]
+	timeBstrs, ok2 := values["end"]
+	if len(timeAstrs) != len(timeBstrs) || !ok1 || !ok2 {
+		retc <- reply{data: nil, err: errbadreq}
+		goto done
+	}
+	for i := 0; i < len(timeAstrs); i++ {
+		log.Printf("timeAstr:%s timeBstr:%s", timeAstrs[i], timeBstrs[i])
+		timeA, errtime := time.Parse("200601021504", timeAstrs[i])
+		timeB, errtime := time.Parse("200601021504", timeBstrs[i])
+		if errtime != nil || timeB.Before(timeA) {
+			retc <- reply{data: nil, err: errbaddate}
+		} else {
+			//buf.WriteString(fmt.Sprintf("quering from t0:%s - t1:%s\n", timeA, timeB))
+			ar.Query(timeA, timeB, retc, &grwg) //this will fire a new goroutine
+		}
+	}
+	// the last goroutine that will wait for all we invoked and close the chan
+	go func(wg *sync.WaitGroup) {
+		wg.Wait()   //wait for all the goroutines to finish sending
+		close(retc) //close the chan so that range in responsewriter will finish
+		log.Printf("closing the chan\n")
+	}(&grwg)
+done:
+	return 200, retc
+}
+
+func (fsa *mrtarchive) Get(values url.Values) (int, chan reply) {
+	return fsa.fsarchive.GetImpl(values, fsa)
+}
+
+func (fsa *xmlarchive) Get(values url.Values) (int, chan reply) {
+	return fsa.fsarchive.GetImpl(values, fsa)
+}
+
+func (ma *mrtarchive) Query(ta, tb time.Time, retc chan reply, wg *sync.WaitGroup) {
+	log.Printf("querying mrt from %s to %s\n", ta, tb)
+	go func(rc chan<- reply) {
+		wg.Add(1)
+		ef := *ma.entryfiles
+		var scanner *bufio.Scanner
+		defer wg.Done()
+		if len(ef) == 0 {
+			rc <- reply{nil, errempty}
+			return
+		}
+		if tb.Before(ef[0].sdate) || ta.After(ef[len(ef)-1].sdate.Add(ma.timedelta)) {
+			rc <- reply{nil, errdate}
+			return
+		}
+		i := sort.Search(len(ef), func(i int) bool {
+			return ef[i].sdate.After(ta.Add(-ma.timedelta - time.Second))
+		})
+		j := sort.Search(len(ef), func(i int) bool {
+			return ef[i].sdate.After(tb)
+		})
+		for k := i; k < j; k++ {
+			fext := filepath.Ext(ef[k].path)
+			file, ferr := os.Open(ef[k].path)
+			if ferr != nil {
+				log.Println("failed opening file: ", ef[k].path, " ", ferr)
+				continue
+			}
+			if fext == ".bz2" {
+				log.Printf("bunzip2 file. opening decompression stream\n")
+				bzreader := bzip2.NewReader(file)
+				scanner = bufio.NewScanner(bzreader)
+				scanner.Split(mrt.SplitMrt)
+			} else {
+				log.Printf("no extension on file: %s. opening normally\n", ef[k].path)
+				scanner = bufio.NewScanner(file)
+				scanner.Split(mrt.SplitMrt)
+			}
+			//buf.WriteString(fmt.Sprintf(" [ file: %s ] ", ef[k].path))
+			startt := time.Now()
+			for scanner.Scan() {
+				data := scanner.Bytes()
+				hdr, errh := mrt.NewMrtHdr(data[:mrt.MrtHdr_size])
+				if errh != nil {
+					log.Printf("error in creating MRT header:%s", errh)
+					rc <- reply{data: nil, err: errh}
+					continue
+				}
+				date := time.Unix(int64(hdr.Mrt_timestamp), 0)
+				log.Printf("scanned mrt with date:%s", date)
+				/*
+					dateindi := strings.Index(str, "<DATETIME>")
+					if dateindi == -1 {
+						log.Println("could not locate DATETIME string in xml msg: ", str)
+						continue
+					}
+					dateindi = dateindi + 10 // go to start of date data
+					dateindj := strings.Index(str[dateindi:], "</DATETIME>")
+					if dateindj == -1 {
+						log.Println("could not locate closing </DATETIME> string in xml msg: ", str)
+						continue
+					}
+					dateindj = dateindj + dateindi // to return it to the relative start of line pos
+					xmldate, derr := time.Parse(time.RFC3339, str[dateindi:dateindj])
+					if derr != nil {
+						log.Println("could not parse datetime: %s\n", derr)
+						continue
+					}
+					//log.Printf("parse xml message date: %s\n", xmldate)
+					if xmldate.After(ta) && xmldate.Before(tb) {
+						//buf.WriteString(fmt.Sprintf("%s\n",str))
+						rc <- reply{data: []byte(fmt.Sprintf("%s\n", str)), err: nil}
+					} else if xmldate.After(tb) { //only later measurements in this file. leaving
+						break
+					}*/
+			}
+			if err := scanner.Err(); err != nil && err != io.EOF {
+				log.Printf("file scanner error:%s\n", err)
+			}
+			log.Printf("finished parsing file %s size %d in %s\n", ef[k].path, ef[k].sz, time.Since(startt))
+			file.Close()
+		}
+		return
+	}(retc)
+}
+
+func (fsa *mrtarchive) visit(path string, f os.FileInfo, err error) error {
+	fname := f.Name()
+	log.Print("examining mrt: ", fname)
+	if strings.LastIndex(path, fsa.descriminator) == -1 {
+		log.Printf("visit: descriminator:%s not found in path:%s . ignoring\n", fsa.descriminator, path)
+		return nil
+	}
+	if f.Mode().IsRegular() {
+		numind := strings.IndexFunc(fname, unicode.IsDigit)
+		extind := strings.LastIndex(fname, ".bz2")
+		if numind == -1 || extind == -1 || extind-numind != 13 {
+			log.Print("file: ", fname, " not in foo.YYYYMMDD.HHMM.bz2... format. extind:%d numberind:%d", extind, numind)
+			return nil
+		}
+		datestr := fname[numind:extind]
+		log.Println("datestr in filename is ", datestr)
+		time, errtime := time.Parse("20060102.1504", datestr)
+		if errtime != nil {
+			log.Print("time.Parse() failed on file: ", fname, " that should be in fooHHMM format with error: ", errtime)
+			return nil
+		}
+		fsa.tempentryfiles = append(fsa.tempentryfiles, archentryfile{path: path, sdate: time, sz: f.Size()})
+	}
+	return nil
+}
+
+func (fsa *xmlarchive) Query(ta, tb time.Time, retc chan reply, wg *sync.WaitGroup) {
+	log.Printf("querying from %s to %s\n", ta, tb)
+	go func(rc chan<- reply) {
+		wg.Add(1)
+		defer wg.Done()
+		ef := *fsa.entryfiles
+		var scanner *bufio.Scanner
+		if len(ef) == 0 {
+			rc <- reply{nil, errempty}
+			return
+		}
+		if tb.Before(ef[0].sdate) || ta.After(ef[len(ef)-1].sdate.Add(fsa.timedelta)) {
+			rc <- reply{nil, errdate}
+			return
+		}
+		i := sort.Search(len(ef), func(i int) bool {
+			return ef[i].sdate.After(ta.Add(-fsa.timedelta - time.Second))
+		})
+		j := sort.Search(len(ef), func(i int) bool {
+			return ef[i].sdate.After(tb)
+		})
+		for k := i; k < j; k++ {
+			fext := filepath.Ext(ef[k].path)
+			file, ferr := os.Open(ef[k].path)
+			if ferr != nil {
+				log.Println("failed opening file: ", ef[k].path, " ", ferr)
+				continue
+			}
+			if fext == "" || fext == ".xml" {
+				log.Printf("no extension on file: %s. opening normally\n", ef[k].path)
+				scanner = bufio.NewScanner(file)
+			} else if fext == ".bz2" {
+				log.Printf("bunzip2 file. opening decompression stream\n")
+				bzreader := bzip2.NewReader(file)
+				scanner = bufio.NewScanner(bzreader)
+			} else {
+				log.Printf("unhandled file extension: %s\n", ef[j].path)
+				continue
+			}
+			//buf.WriteString(fmt.Sprintf(" [ file: %s ] ", ef[k].path))
+			startt := time.Now()
+			for scanner.Scan() {
+				str := scanner.Text()
+				dateindi := strings.Index(str, "<DATETIME>")
+				if dateindi == -1 {
+					log.Println("could not locate DATETIME string in xml msg: ", str)
+					continue
+				}
+				dateindi = dateindi + 10 // go to start of date data
+				dateindj := strings.Index(str[dateindi:], "</DATETIME>")
+				if dateindj == -1 {
+					log.Println("could not locate closing </DATETIME> string in xml msg: ", str)
+					continue
+				}
+				dateindj = dateindj + dateindi // to return it to the relative start of line pos
+				xmldate, derr := time.Parse(time.RFC3339, str[dateindi:dateindj])
+				if derr != nil {
+					log.Println("could not parse datetime: %s\n", derr)
+					continue
+				}
+				//log.Printf("parse xml message date: %s\n", xmldate)
+				if xmldate.After(ta) && xmldate.Before(tb) {
+					//buf.WriteString(fmt.Sprintf("%s\n",str))
+					rc <- reply{data: []byte(fmt.Sprintf("%s\n", str)), err: nil}
+				} else if xmldate.After(tb) { //only later measurements in this file. leaving
+					break
+				}
+			}
+			if err := scanner.Err(); err != nil && err != io.EOF {
+				log.Printf("file scanner error:%s\n", err)
+			}
+			log.Printf("finished parsing file %s size %d in %s\n", ef[k].path, ef[k].sz, time.Since(startt))
+			file.Close()
+		}
+		return
+	}(retc)
+}
+
+func NewMRTArchive(path, descr string) *mrtarchive {
+	return &mrtarchive{NewFsArchive(path, descr)}
+}
+
+func NewFsArchive(path, descr string) *fsarchive {
+	return &fsarchive{
+		rootpathstr:    path,
+		entryfiles:     &timeentryslice{},
+		tempentryfiles: timeentryslice{},
+		curyr:          0,
+		curmon:         0,
+		curday:         0,
+		reqchan:        make(chan string),
+		scanning:       false,
+		Scanwg:         &sync.WaitGroup{},
+		scanch:         make(chan struct{}),
+		timedelta:      15 * time.Minute,
+		descriminator:  descr,
+		conf:           &fsarconf{},
+	}
+}
+
+func NewXmlArchive(path, descr string) *xmlarchive {
+	return &xmlarchive{NewFsArchive(path, descr)}
+}
+
+//trying to see if a dir name is in YYYY.MM form
+//returns true, year, month if it is, or false, 0, 0 if not.
+func isYearMonthDir(fname string) (res bool, yr int, mon int) {
+	var err error
+	res = false
+	yr = 0
+	mon = 0
+	isdot := func(r rune) bool {
+		if r == '.' {
+			return true
+		}
+		return false
+	}
+	ind := strings.IndexFunc(fname, isdot)
+	//not found or in the form foo.
+	if ind == -1 || ind == len(fname) {
+		return
+	}
+	//not YYYY or MM
+	if len(fname[:ind]) != 4 || len(fname[ind+1:]) != 2 {
+		return
+	}
+	yr, err = strconv.Atoi(fname[:ind])
+	if err != nil {
+		return
+	}
+	mon, err = strconv.Atoi(fname[ind+1:])
+	if err != nil {
+		return
+	}
+	if mon < 1 || mon > 12 {
+		return
+	}
+	//the values were found to be valid
+	res = true
+	return
+}
+
+func (fsa *xmlarchive) visit(path string, f os.FileInfo, err error) error {
+	fname := f.Name()
+	log.Print("examining ", fname)
+	if strings.LastIndex(path, fsa.descriminator) == -1 {
+		log.Printf("visit: descriminator:%s not found in path:%s . ignoring\n", fsa.descriminator, path)
+		return nil
+	}
+
+	if f.Mode().IsRegular() {
+		numind := strings.IndexFunc(fname, unicode.IsDigit)
+		xmlind := strings.LastIndex(fname, ".xml")
+		if numind == -1 || xmlind == -1 || xmlind-numind != 13 {
+			log.Print("file: ", fname, " not in foo.YYYYMMDD.HHMM.xml... format")
+			return nil
+		}
+		datestr := fname[numind:xmlind]
+		log.Println("datestr in filename is ", datestr)
+		time, errtime := time.Parse("20060102.1504", datestr)
+		if errtime != nil {
+			log.Print("time.Parse() failed on file: ", fname, " that should be in fooHHMM format with error: ", errtime)
+			return nil
+		}
+		fsa.tempentryfiles = append(fsa.tempentryfiles, archentryfile{path: path, sdate: time, sz: f.Size()})
+	}
+	return nil
+}
+
+func (fsa *fsarchive) printEntries() {
+	log.Printf("dumping entries")
+	for _, ef := range *fsa.entryfiles {
+		fmt.Printf("%s %s\n", ef.path, ef.sdate)
+	}
+}
+
+func (fsa *fsarchive) scan(ar archive) {
+	//clear the temp slice
+	fsa.tempentryfiles = []archentryfile{}
+	fsa.Scanwg.Add(1)
+	fsa.scanning = true
+	filepath.Walk(fsa.rootpathstr, ar.visit)
+	sort.Sort(fsa.tempentryfiles)
+	//allow the serve goroutine to unblock in case of STOP.
+	fsa.Scanwg.Done()
+	//signal the serve goroutine on scandone channel
+	fsa.scanch <- struct{}{}
+}
+
+func (fsa *fsarchive) Serve(wg *sync.WaitGroup, ar archive) (reqchan chan<- string) {
+	if fsa.reqchan == nil { // we have closed the channel and now called again
+		fsa.reqchan = make(chan string)
+	}
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		for {
+			select {
+			case req := <-fsa.reqchan:
+				switch req {
+				case "SCAN":
+					if fsa.scanning {
+						log.Print("fsarchive: already scanning. ignoring command")
+					} else { //fire an async goroutine to scan the files and wait for SCANDONE
+						go fsa.scan(ar)
+					}
+				case "DUMPENTRIES":
+					if fsa.scanning {
+						log.Print("fsar: warning. scanning in progress")
+					}
+					fsa.printEntries()
+				case "STOP":
+					log.Print("fsar: stopping")
+					fsa.Scanwg.Wait()
+					fsa.reqchan = nil //no more stuff from this channel
+					return
+				default:
+					log.Print("fsarchive: unknown request: ", req)
+				}
+			case <-fsa.scanch:
+				//update the reference to our file slice
+				fsa.entryfiles = &fsa.tempentryfiles
+				fsa.scanning = false
+				//let the config know
+				log.Printf("setting conf arfiles from :%v to a slice of len: %v\n", fsa.conf.arfiles, len(*fsa.entryfiles))
+				fsa.conf.arfiles = fsa.entryfiles
+				log.Print("fsarchive: scan finished")
+			}
+		}
+	}()
+	return fsa.reqchan
+}
diff --git a/bgp.go b/bgp.go
@@ -0,0 +1 @@
+package bgp
diff --git a/cmd/archive_server.go b/cmd/archive_server.go
@@ -0,0 +1,51 @@
+package main
+
+import (
+	ar "go-bgp/archive"
+	"log"
+	"os"
+	"sync"
+)
+
+func main() {
+	if len(os.Args) != 2 {
+		log.Fatal("usage: ", os.Args[0], " directory ")
+	}
+	basedirstr := os.Args[1]
+	ribmrtar := ar.NewMRTArchive(basedirstr, "RIBS")
+	wg1 := &sync.WaitGroup{}
+	mrtreqc := ribmrtar.Serve(wg1, ribmrtar)
+	mrtreqc <- "SCAN"
+	ribmrtar.Scanwg.Wait()
+	api := new(ar.API)
+	api.AddResource(ribmrtar, "/archive/mrt/ribs")
+	api.Start(3000)
+	close(mrtreqc)
+	wg1.Wait()
+	/*
+		updfsar := NewXmlArchive(basedirstr, "UPDATES")
+		//ribfsar := NewFsArchive(basedirstr, "RIBS")
+		wg2	:=  &sync.WaitGroup{}
+		updreqc := updfsar.serve(wg2, updfsar)
+		//ribreqc := updfsar.serve(wg2)
+		updreqc <- "SCAN"
+		updfsar.scanwg.Wait()
+		//ribfsar.scanwg.Wait()
+		//time.Sleep(time.Second*2)
+		updreqc <- "DUMPENTRIES"
+		api := new(API)
+		api.AddResource(updfsar, "/archive/updates")
+		//api.AddResource(ribfsar, "/archive/ribs")
+		api.AddResource(updfsar.conf, "/archive/updates/conf")
+		//api.AddResource(ribfsar.conf, "/archive/ribs/conf")
+		api.Start(3000)
+		//reqc<-"STOP"
+
+		close(updreqc)
+		//close(ribreqc)
+		//wait for it
+		//wg1.Wait()
+		wg2.Wait()
+	*/
+	log.Print("all fsarchives stopped. exiting")
+}
diff --git a/doc/draft-ietf-grow-mrt-11.txt b/doc/draft-ietf-grow-mrt-11.txt
@@ -0,0 +1,1625 @@
+
+
+
+Network Working Group                                           L. Blunk
+Internet-Draft                                                  M. Karir
+Intended status: Standards Track                           Merit Network
+Expires: September 9, 2010                                   C. Labovitz
+                                                          Arbor Networks
+                                                           March 8, 2010
+
+
+                 MRT routing information export format
+                       draft-ietf-grow-mrt-11.txt
+
+Abstract
+
+   This document describes the MRT format for routing information
+   export.  This format was developed in concert with the Multi-threaded
+   Routing Toolkit (MRT) from whence the format takes it name.  The
+   format can be used to export routing protocol messages, state
+   changes, and routing information base contents.
+
+Status of this Memo
+
+   This Internet-Draft is submitted to IETF in full conformance with the
+   provisions of BCP 78 and BCP 79.
+
+   Internet-Drafts are working documents of the Internet Engineering
+   Task Force (IETF), its areas, and its working groups.  Note that
+   other groups may also distribute working documents as Internet-
+   Drafts.
+
+   Internet-Drafts are draft documents valid for a maximum of six months
+   and may be updated, replaced, or obsoleted by other documents at any
+   time.  It is inappropriate to use Internet-Drafts as reference
+   material or to cite them other than as "work in progress."
+
+   The list of current Internet-Drafts can be accessed at
+   http://www.ietf.org/ietf/1id-abstracts.txt.
+
+   The list of Internet-Draft Shadow Directories can be accessed at
+   http://www.ietf.org/shadow.html.
+
+   This Internet-Draft will expire on September 9, 2010.
+
+Copyright Notice
+
+   Copyright (c) 2010 IETF Trust and the persons identified as the
+   document authors.  All rights reserved.
+
+   This document is subject to BCP 78 and the IETF Trust's Legal
+
+
+
+Blunk, et al.           Expires September 9, 2010               [Page 1]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+   Provisions Relating to IETF Documents
+   (http://trustee.ietf.org/license-info) in effect on the date of
+   publication of this document.  Please review these documents
+   carefully, as they describe your rights and restrictions with respect
+   to this document.  Code Components extracted from this document must
+   include Simplified BSD License text as described in Section 4.e of
+   the Trust Legal Provisions and are provided without warranty as
+   described in the BSD License.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010               [Page 2]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+Table of Contents
+
+   1.  Requirements notation  . . . . . . . . . . . . . . . . . . . .  4
+   2.  Introduction . . . . . . . . . . . . . . . . . . . . . . . . .  5
+   3.  Basic MRT Format . . . . . . . . . . . . . . . . . . . . . . .  6
+   4.  MRT Informational Types  . . . . . . . . . . . . . . . . . . .  8
+     4.1.  START Type . . . . . . . . . . . . . . . . . . . . . . . .  8
+     4.2.  I_AM_DEAD Type . . . . . . . . . . . . . . . . . . . . . .  8
+   5.  MRT Routing Information Types  . . . . . . . . . . . . . . . .  9
+     5.1.  OSPF Type  . . . . . . . . . . . . . . . . . . . . . . . .  9
+     5.2.  TABLE_DUMP Type  . . . . . . . . . . . . . . . . . . . . . 10
+     5.3.  TABLE_DUMP_V2 Type . . . . . . . . . . . . . . . . . . . . 11
+     5.4.  BGP4MP Type  . . . . . . . . . . . . . . . . . . . . . . . 14
+       5.4.1.  BGP4MP_STATE_CHANGE Subtype  . . . . . . . . . . . . . 14
+       5.4.2.  BGP4MP_MESSAGE Subtype . . . . . . . . . . . . . . . . 15
+       5.4.3.  BGP4MP_MESSAGE_AS4 Subtype . . . . . . . . . . . . . . 16
+       5.4.4.  BGP4MP_STATE_CHANGE_AS4 Subtype  . . . . . . . . . . . 16
+       5.4.5.  BGP4MP_MESSAGE_LOCAL Subtype . . . . . . . . . . . . . 17
+       5.4.6.  BGP4MP_MESSAGE_AS4_LOCAL Subtype . . . . . . . . . . . 17
+     5.5.  BGP4MP_ET Type . . . . . . . . . . . . . . . . . . . . . . 17
+     5.6.  ISIS Type  . . . . . . . . . . . . . . . . . . . . . . . . 18
+     5.7.  ISIS_ET Type . . . . . . . . . . . . . . . . . . . . . . . 18
+     5.8.  OSPFv3 Type  . . . . . . . . . . . . . . . . . . . . . . . 18
+     5.9.  OSPFv3_ET Type . . . . . . . . . . . . . . . . . . . . . . 19
+   6.  IANA Considerations  . . . . . . . . . . . . . . . . . . . . . 20
+     6.1.  Type Codes . . . . . . . . . . . . . . . . . . . . . . . . 20
+     6.2.  Subtype Codes  . . . . . . . . . . . . . . . . . . . . . . 20
+   7.  Security Considerations  . . . . . . . . . . . . . . . . . . . 21
+   8.  References . . . . . . . . . . . . . . . . . . . . . . . . . . 22
+     8.1.  Normative References . . . . . . . . . . . . . . . . . . . 22
+     8.2.  Informative References . . . . . . . . . . . . . . . . . . 22
+   Appendix A.  Deprecated MRT types  . . . . . . . . . . . . . . . . 23
+     A.1.  Deprecated MRT Informational Types . . . . . . . . . . . . 23
+       A.1.1.  NULL Type  . . . . . . . . . . . . . . . . . . . . . . 23
+       A.1.2.  DIE Type . . . . . . . . . . . . . . . . . . . . . . . 23
+       A.1.3.  PEER_DOWN Type . . . . . . . . . . . . . . . . . . . . 23
+     A.2.  Deprecated MRT Routing Information Types . . . . . . . . . 23
+       A.2.1.  BGP Type . . . . . . . . . . . . . . . . . . . . . . . 23
+       A.2.2.  RIP Type . . . . . . . . . . . . . . . . . . . . . . . 26
+       A.2.3.  IDRP Type  . . . . . . . . . . . . . . . . . . . . . . 26
+       A.2.4.  RIPNG Type . . . . . . . . . . . . . . . . . . . . . . 26
+       A.2.5.  BGP4PLUS and BGP4PLUS_01 Types . . . . . . . . . . . . 27
+       A.2.6.  Deprecated BGP4MP Subtypes . . . . . . . . . . . . . . 27
+   Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . 29
+
+
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010               [Page 3]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+1.  Requirements notation
+
+   The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+   "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+   document are to be interpreted as described in [RFC2119].
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010               [Page 4]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+2.  Introduction
+
+   Researchers and engineers often wish to analyze network behavior by
+   studying routing protocol transactions and routing information base
+   snapshots.  To this end, the MRT format was developed to encapsulate,
+   export, and archive this information in a standardized data
+   representation.  The BGP routing protocol, in particular, has been
+   the subject of extensive study and analysis which has been
+   significantly aided by the availability of the MRT format.  The MRT
+   format was initially defined in the MRT Programmer's Guide [MRT PROG
+   GUIDE].
+
+   This memo serves to document the MRT format as currently implemented
+   in publicly available software.  The format has been extended since
+   it's original introduction in the MRT toolset and these extensions
+   are also included in this memo.  Further extensions may be introduced
+   at a later date through additional definitions of the MRT Type field
+   and Subtype fields.
+
+   A number of MRT message types have been documented in some references
+   but are not known to have been implemented.  Further, several types
+   were employed in early MRT implementations, but are no longer
+   actively being used.  These types are considered to be deprecated and
+   are documented in a separate appendix at the end of this document.
+   Some of the deprecated types may of interest to researchers examining
+   historical MRT archives.
+
+   Fields which contain multi-octet numeric values are encoded in
+   network octet order from most significant octet to least significant
+   octet.  Fields which contain routing message fields are encoded in
+   the same order as they appear in the packet contents.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010               [Page 5]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+3.  Basic MRT Format
+
+   All MRT format messages have a common header which includes a
+   timestamp, Type, Subtype, and length field.  The header is followed
+   by a message field.  The MRT common header is illustrated below.
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                           Timestamp                           |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |             Type              |            Subtype            |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                             Length                            |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                      Message... (variable)
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   Header Field Descriptions:
+
+
+      Timestamp:
+
+         Time in seconds since 1 January 1970 00:00:00 UTC
+
+
+      Type:
+
+         A 2-octet field that indicates the Type of information
+         contained in the message field.  Types 0 through 4 are
+         informational messages pertaining to the state of an MRT
+         collector, while Types 5 and higher are used to convey routing
+         information.
+
+
+      Subtype:
+
+         A 2-octet field that is used to further distinguish message
+         information within a particular message Type.
+
+
+      Length:
+
+         A 4-octet message length field.  The length field contains the
+         number of octets within the message.  The length field does not
+         include the length of the MRT common header.
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010               [Page 6]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+
+      Message:
+
+         A variable length message.  The contents of this field are
+         context dependent upon the Type and Subtype fields.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010               [Page 7]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+4.  MRT Informational Types
+
+   The MRT format defines five Informational Type messages.  These
+   messages are intended to signal the state of an MRT data collector
+   and do not contain routing information.  These messages are OPTIONAL
+   and were largely intended for use when MRT messages are sent over a
+   network to a remote repository store.  However, MRT message
+   repository stores have traditionally resided on the same device as
+   the collector and these Informational Types have seen limited
+   implementation.  Further, transport mechanisms for MRT messages are
+   considered to be outside the scope of this document.
+
+   The START and I_AM_DEAD messages MAY be used to provide a time
+   reference when a data collector begins and ends the collection
+   process.  The time reference is obtained from the Timestamp field in
+   the MRT message header.
+
+   The message field MAY contain an OPTIONAL message string for
+   diagnostic purposes.  The message string encoding MUST follow the
+   UTF-8 transformation format.  The Subtype field is unused for these
+   Types and SHOULD be set to 0.
+
+   The MRT Informational Types are defined below:
+
+       1    START
+       3    I_AM_DEAD
+
+4.1.  START Type
+
+   The START Type indicates a collector is about to begin generating MRT
+   messages.
+
+4.2.  I_AM_DEAD Type
+
+   An I_AM_DEAD MRT message indicates that a collector has shut down and
+   has stopped generating MRT messages.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010               [Page 8]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+5.  MRT Routing Information Types
+
+   The following Types are currently defined for the MRT format.  Types
+   11 and 12 were defined in the MRT Toolkit package.  The BGP4MP Type,
+   number 16, was initially defined in the Zebra routing software
+   package.  The BGP4MP_ET, ISIS, and ISIS_ET Types were initially
+   defined in the Sprint Labs Python Routing Toolkit (PyRT).  The OSPFv3
+   and OSPFv3_ET Types are newly defined types created for the OSPFv3
+   routing protocol.
+
+       11   OSPF
+       12   TABLE_DUMP
+       13   TABLE_DUMP_V2
+       16   BGP4MP
+       17   BGP4MP_ET
+       32   ISIS
+       33   ISIS_ET
+       48   OSPFv3
+       49   OSPFv3_ET
+
+5.1.  OSPF Type
+
+   This Type supports the OSPF Protocol as defined in RFC 2328
+   [RFC2328].  The Subtype field may contain two possible values:
+
+       0    OSPF_STATE_CHANGE
+       1    OSPF_LSA_UPDATE
+
+   The format of the MRT Message field for the OSPF Type is as follows:
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                         Remote IP address                     |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                         Local IP address                      |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                  OSPF Message Contents (variable)
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+
+
+
+
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010               [Page 9]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+5.2.  TABLE_DUMP Type
+
+   The TABLE_DUMP Type is used to encode the contents of a BGP Routing
+   Information Base (RIB).  Each RIB entry is encoded in a distinct
+   sequential MRT record.  The Subtype field is used to encode whether
+   the RIB entry contains IPv4 or IPv6 addresses.  There are two
+   possible values for the Subtype as shown below.
+
+       1    AFI_IPv4
+       2    AFI_IPv6
+
+   The format of the TABLE_DUMP Type is illustrated below.
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |           View #              |       Sequence number         |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                        Prefix (variable)                      |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       | Prefix Length |    Status     |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                         Originated Time                       |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                    Peer IP address (variable)                 |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |           Peer AS             |       Attribute Length        |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                   BGP Attribute... (variable)
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   The View field is normally 0 and is intended for cases where an
+   implementation may have multiple RIB views (such as a route server).
+   In cases where multiple RIB views are present, an implementation may
+   use the the view field to distinguish entries from each view.  The
+   Sequence field is a simple incremental counter for each RIB entry.  A
+   typical RIB dump will exceed the 16-bit bounds of this counter and
+   implementation should simply wrap back to zero and continue
+   incrementing the counter in such cases.
+
+   The Prefix field contains the IP address of a particular RIB entry.
+   The size of this field is dependent on the value of the Subtype for
+   this message.  For AFI_IPv4, this field is 4 octets, for AFI_IPv6, it
+   is 16 octets in length.  The Prefix Length field indicates the length
+   in bits of the prefix mask for the preceding Prefix field.
+
+   The Status octet is not used in the TABLE_DUMP Type and SHOULD be set
+   to 1.
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 10]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+   The Originated Time contains the 4-octet time at which this prefix
+   was heard.  The value represents the time in seconds since 1 January
+   1970 00:00:00 UTC.
+
+   The Peer IP field is the IP address of the peer which provided the
+   update for this RIB entry.  As with the Prefix field, the size of
+   this field is dependent on the Subtype.  AFI_IPv4 indicates a 4 octet
+   field and an IPv4 address, while a Subtype of AFI_IPv6 requires a 16
+   octet field and an IPv6 address.  The Peer AS field contains the AS
+   number of the peer.
+
+   Attribute length is the length of Attribute field and is 2-octets.
+   The Attribute field contains the attribute information for the RIB
+   entry.
+
+5.3.  TABLE_DUMP_V2 Type
+
+   The TABLE_DUMP_V2 Type updates the TABLE_DUMP Type to include 4-Byte
+   ASN support and full support for BGP Multiprotocol extensions.  It
+   also improves upon the space efficiency of the TABLE_DUMP Type by
+   employing an index table for peers and permitting a single MRT record
+   per NLRI entry.  The following subtypes are used with the
+   TABLE_DUMP_V2 Type.
+
+       1    PEER_INDEX_TABLE
+       2    RIB_IPV4_UNICAST
+       3    RIB_IPV4_MULTICAST
+       4    RIB_IPV6_UNICAST
+       5    RIB_IPV6_MULTICAST
+       6    RIB_GENERIC
+
+   An initial PEER_INDEX_TABLE MRT record provides the BGP ID of the
+   collector, an optional view name, and a list of indexed peers.
+   Following the PEER_INDEX_TABLE MRT record, a series of MRT records
+   are used to encode RIB table entries.  This series of MRT records use
+   subtypes 2-6 and are separate from the PEER_INDEX_TABLE MRT record
+   itself and include full MRT record headers.  The header of the
+   PEER_INDEX_TABLE Subtype is shown below.  The View Name is optional
+   and, if not present, the View Name Length MUST be set to 0.  The View
+   Name encoding MUST follow the UTF-8 transformation format.
+
+
+
+
+
+
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 11]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                      Collector BGP ID                         |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |       View Name Length        |     View Name (variable)      |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |          Peer Count           |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   The format of the peer entries is shown below.  The PEER_INDEX_TABLE
+   record contains Peer Count peer entries.
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |   Peer Type   |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                         Peer BGP ID                           |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                   Peer IP address (variable)                  |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                        Peer AS (variable)                     |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   The Peer Type, Peer BGP ID, Peer IP, and Peer AS fields are repeated
+   as indicated by the Peer Count field.  The position of the Peer in
+   the PEER_INDEX_TABLE is used as an index in the subsequent
+   TABLE_DUMP_V2 MRT records.  The index number begins with 0.
+
+   The Peer Type field is a bit field which encodes the type of the AS
+   and IP address as follows:
+
+       Bit 0 - unset for IPv4 Peer IP address, set for IPv6
+       Bit 1 - unset when Peer AS is 16 bits, set when it's 32 bits
+
+   The records which follow the PEER_INDEX_TABLE record constitute the
+   RIB entries and include a header which specifies a sequence number,
+   NLRI, and a count of the number of RIB entries which follow.
+
+   The format for the RIB_IPV4_UNICAST, RIB_IPV4_MULTICAST,
+   RIB_IPV6_UNICAST, and RIB_IPV6_MULTICAST headers are shown below.
+   The Prefix Length and Prefix fields are encoded in the same manner as
+   the BGP NLRI encoding for IPV4 and IPV6 prefixes.  Namely, the Prefix
+   field contains address prefixes followed by enough trailing bits to
+   make the end of the field fall on an octet boundary.  Note that the
+   value of trailing bits is irrelevant.
+
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 12]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                         Sequence number                       |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       | Prefix Length |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                        Prefix (variable)                      |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |         Entry Count           |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   The RIB_GENERIC header is shown below.  It includes Address Family
+   Identifier (AFI), Subsequent AFI and a single NLRI entry.  The NLRI
+   information is specific to the AFI and SAFI values.  An
+   implementation which does not recognize particular AFI and SAFI
+   values SHOULD discard the remainder of the MRT record.
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                         Sequence number                       |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |    Address Family Identifier  |Subsequent AFI |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |     Network Layer Reachability Information (variable)         |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |         Entry Count           |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   The RIB entry headers are followed by a series of RIB entries which
+   are repeated Entry Count times.  These entries share a common format
+   as shown below.  They include a Peer Index from the PEER_INDEX_TABLE
+   MRT record, an originated time for the RIB entry, and the BGP path
+   attribute length and attributes encoded as provided in a BGP Update
+   message.
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |         Peer Index            |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                         Originated Time                       |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |      Attribute Length         |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                    BGP Attributes... (variable)
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 13]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+   There is one exception to the encoding of BGP attributes for the BGP
+   MP_REACH_NLRI attribute (BGP Type Code 14) [RFC 4760].  Since the
+   AFI, SAFI, and NLRI information is already encoded in the
+   MULTIPROTOCOL header, only the Next Hop Address Length and Next Hop
+   Address fields are included.  The Reserved field is omitted.  The
+   attribute length is also adjusted to reflect only the length of the
+   Next Hop Address Length and Next Hop Address fields.
+
+5.4.  BGP4MP Type
+
+   This Type was initially defined in the Zebra software package for the
+   BGP protocol with multiprotocol extension support as defined by RFC
+   4760 [RFC4760].  It supersedes the BGP, BGP4PLUS, BGP4PLUS_01 Types.
+   The BGP4MP Type has six Subtypes which are defined as follows:
+
+       0    BGP4MP_STATE_CHANGE
+       1    BGP4MP_MESSAGE
+       4    BGP4MP_MESSAGE_AS4
+       5    BGP4MP_STATE_CHANGE_AS4
+       6    BGP4MP_MESSAGE_LOCAL
+       7    BGP4MP_MESSAGE_AS4_LOCAL
+
+5.4.1.  BGP4MP_STATE_CHANGE Subtype
+
+   This record is used to encode state changes in the BGP finite state
+   machine.  The BGP FSM states are encoded in the Old State and New
+   State fields to indicate the previous and current state.  In some
+   cases, the Peer AS number may be undefined.  In such cases, the value
+   of this field may be set to zero.  The format is illustrated below:
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |         Peer AS number        |        Local AS number        |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |        Interface Index        |        Address Family         |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                      Peer IP address (variable)               |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                      Local IP address (variable)              |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |            Old State          |          New State            |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 14]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+   The FSM states are defined in RFC 4271 [RFC4271], Section 8.2.2.
+   Both the old state value and the new state value are encoded as
+   2-octet numbers.  The state values are defined numerically as
+   follows:
+
+       1    Idle
+       2    Connect
+       3    Active
+       4    OpenSent
+       5    OpenConfirm
+       6    Established
+
+   The BGP4MP_STATE_CHANGE message also includes interface index and
+   Address Family fields.  The interface index provides the interface
+   number of the peering session.  The index value is OPTIONAL and MAY
+   be zero if unknown or unsupported.  The Address Family indicates what
+   types of addresses are in the the address fields.  At present, the
+   following AFI Types are supported:
+
+       1    AFI_IPv4
+       2    AFI_IPv6
+
+5.4.2.  BGP4MP_MESSAGE Subtype
+
+   This Subtype is used to encode BGP Messages.  It can be used to
+   encode any Type of BGP message.  The entire BGP message is
+   encapsulated in the BGP Message field, including the 16-octet marker,
+   the 2-octet length, and the 1-octet type fields.  Note that the
+   BGP4MP_MESSAGE Subtype does not support 4-Byte AS numbers.  Further,
+   the AS_PATH contained in these messages MUST only consist of 2-Byte
+   AS numbers.  The BGP4MP_MESSAGE_AS4 Subtype updates the
+   BGP4MP_MESSAGE Subtype in order to support 4-Byte AS numbers.  The
+   BGP4MP_MESSAGE fields are shown below:
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |         Peer AS number        |        Local AS number        |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |        Interface Index        |        Address Family         |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                      Peer IP address (variable)               |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                      Local IP address (variable)              |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                    BGP Message... (variable)
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 15]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+   The interface index provides the interface number of the peering
+   session.  The index value is OPTIONAL and MAY be zero if unknown or
+   unsupported.  The Address Family indicates what types of addresses
+   are in the the subsequent address fields.  At present, the following
+   AFI Types are supported:
+
+       1    AFI_IPv4
+       2    AFI_IPv6
+
+   Note that the Address Family value only applies to the IP addresses
+   contained in the MRT header.  The BGP4MP_MESSAGE Subtype is otherwise
+   transparent to the contents of the actual message which may contain
+   any valid AFI/SAFI values.  Only one BGP message may be encoded in
+   the BGP4MP_MESSAGE Subtype.
+
+5.4.3.  BGP4MP_MESSAGE_AS4 Subtype
+
+   This Subtype updates the BGP4MP_MESSAGE Subtype to support 4-Byte
+   Autonomous System numbers.  The BGP4MP_MESSAGE_AS4 Subtype is
+   otherwise identical to the BGP4MP_MESSAGE Subtype.  The AS_PATH in
+   these messages MUST only consist of 4-Byte AS numbers.  The
+   BGP4MP_MESSAGE_AS4 fields are shown below:
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                         Peer AS number                        |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                         Local AS number                       |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |        Interface Index        |        Address Family         |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                      Peer IP address (variable)               |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                      Local IP address (variable)              |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                    BGP Message... (variable)
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+5.4.4.  BGP4MP_STATE_CHANGE_AS4 Subtype
+
+   This Subtype updates the BGP4MP_STATE_CHANGE Subtype to support
+   4-Byte Autonomous System numbers.  As with the BGP4MP_STATE_CHANGE
+   Subtype, the BGP FSM states are encoded in the Old State and New
+   State fields to indicate the previous and current state.  Aside from
+   the extension of the peer and local AS fields to 4-Bytes, this
+   subtype is otherwise identical to the BGP4MP_STATE_CHANGE Subtype.
+   The BGP4MP_STATE_CHANGE_AS4 fields are shown below:
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 16]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                         Peer AS number                        |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                         Local AS number                       |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |        Interface Index        |        Address Family         |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                      Peer IP address (variable)               |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                      Local IP address (variable)              |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |            Old State          |          New State            |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+5.4.5.  BGP4MP_MESSAGE_LOCAL Subtype
+
+   Implementations of MRT have largely focused on collecting remotely
+   generated BGP messages in a passive route collector role.  However,
+   for active BGP implementations, it can be useful to archive locally
+   generated BGP messages in addition to remote messages.  This subtype
+   is added to indicated a locally generated BGP message.  The fields
+   remain identical to the BGP4MP_MESSAGE type including the Peer and
+   Local IP and AS fields.  The Local fields continue to refer to the
+   local IP and AS number of the collector which generated the message
+   and the Peer IP and AS fields refer to the receipient of the
+   generated BGP messages.
+
+5.4.6.  BGP4MP_MESSAGE_AS4_LOCAL Subtype
+
+   As with the BGP4MP_MESSAGE_LOCAL type, this type indicate locally
+   generated messages.  The fields are identical to the
+   BGP4MP_MESSAGE_AS4 message type.
+
+5.5.  BGP4MP_ET Type
+
+   This Type was initially defined in the Sprint Labs Python Routing
+   Toolkit (PyRT).  It extends the MRT common header field to include a
+   32BIT microsecond timestamp field.  The type and subtype field
+   definitions remain as defined for the BGP4MP Type.  The 32BIT
+   microsecond timestamp immediately follows the length field in the MRT
+   common header and precedes all other fields in the message.  The
+   32BIT microsecond field is included in the computation of the length
+   field value.  The MRT common header modification is illustrated
+   below.
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 17]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                           Timestamp                           |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |             Type              |            Subtype            |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                             Length                            |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                      microsecond timestamp                    |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                      Message... (variable)
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+5.6.  ISIS Type
+
+   This Type was initially defined in the Sprint Labs Python Routing and
+   supports the IS-IS routing protocol as defined in RFC 1195 [RFC1195].
+   There is no Type specific header for the ISIS Type.  The Subtype code
+   for this Type is undefined.  The ISIS PDU directly follows the MRT
+   common header fields.
+
+5.7.  ISIS_ET Type
+
+   The ISIS_ET Type extends the ISIS Type to support microsecond
+   timestamps.  As with the BGP4MP_ET Type, a 32BIT microsecond
+   timestamp field is appended to the MRT common header after the length
+   field.  The ISIS_ET Type is otherwise identical to the ISIS Type.
+
+5.8.  OSPFv3 Type
+
+   The OSPFv3 Type extends the original OSPF Type to support IPv6
+   addresses for the OSPFv3 protocol as defined in RFC 5340 [RFC5340].
+   The format of the MRT Message field for the OSPFv3 Type is as
+   follows:
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |        Address Family         |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                     Remote IP address (variable)              |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                      Local IP address (variable)              |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                  OSPF Message Contents (variable)
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 18]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+5.9.  OSPFv3_ET Type
+
+   The OSPFv3_ET Type extends the OSPFv3 Type to support microsecond
+   timestamps.  As with the BGP4MP_ET Type, a 32BIT microsecond
+   timestamp field is appended to the MRT common header after the length
+   field and its length is included in the calculation of the length
+   field value.  The OSPFv3_ET Type is otherwise identical to the OSPFv3
+   Type.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 19]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+6.  IANA Considerations
+
+   This section provides guidance to the Internet Assigned Numbers
+   Authority (IANA) regarding registration of values related to the MRT
+   specification, in accordance with BCP 26, RFC 5226 [RFC5226].
+
+   There are two name spaces in MRT that require registration: Type
+   Codes and Subtype Codes.
+
+   MRT is not intended as a general-purpose specification for protocol
+   information export, and allocations should not be made for purposes
+   unrelated to routing protocol information export.
+
+   The following policies are used here with the meanings defined in BCP
+   26: "Specification Required", "IETF Consensus", "Experimental Use",
+   "First Come First Served".
+
+6.1.  Type Codes
+
+   Type Codes have a range from 0 to 65535, of which 1-64 have been
+   allocated.  New Type Codes MUST be allocated starting at 65.  Type
+   Codes 65 - 511 are to be assigned by IETF Review.  Type Codes 512 -
+   2047 are assigned based on Specification Required.  Type Codes 2048 -
+   64511 are available on a First Come First Served policy.  Type Codes
+   64512 - 65534 are available for Experimental Use. The Type Code
+   Values of 0 and 65535 are reserved.
+
+6.2.  Subtype Codes
+
+   Subtype Codes have a range from 0 to 65535.  Subtype definitions are
+   specific to a particular Type Code definition.  New Subtype Code
+   definition must reference an existing Type Code to which the Subtype
+   belongs.  Subtype assignmnents to Type Codes 0 - 511 are to be
+   assigned by IETF Review.  Subtype assignments for the remaning Type
+   Codes follow the assignment rules for the Type Codes to which they
+   belong.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 20]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+7.  Security Considerations
+
+   The MRT Format utilizes a structure which can store routing protocol
+   information data.  The fields defined in the MRT specification are of
+   a descriptive nature and provide information that is useful to
+   facilitate the analysis of routing data.  As such, the fields
+   currently defined in the MRT specification do not in themselves
+   create additional security risks, since the fields are not used to
+   induce any particular behavior by the recipient application.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 21]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+8.  References
+
+8.1.  Normative References
+
+   [RFC1058]  Hedrick, C., "Routing Information Protocol", RFC 1058,
+              June 1988.
+
+   [RFC1195]  Callon, R., "Use of OSI IS-IS for routing in TCP/IP and
+              dual environments", RFC 1195, December 1990.
+
+   [RFC2080]  Malkin, G. and R. Minnear, "RIPng for IPv6", RFC 2080,
+              January 1997.
+
+   [RFC2119]  Bradner, S., "Key words for use in RFCs to Indicate
+              Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+   [RFC2328]  Moy, J., "OSPF Version 2", STD 54, RFC 2328, April 1998.
+
+   [RFC4271]  Rekhter, Y., Li, T., and S. Hares, "A Border Gateway
+              Protocol 4 (BGP-4)", RFC 4271, January 2006.
+
+   [RFC4760]  Bates, T., Chandra, R., Katz, D., and Y. Rekhter,
+              "Multiprotocol Extensions for BGP-4", RFC 4760,
+              January 2007.
+
+   [RFC5226]  Narten, T. and H. Alvestrand, "Guidelines for Writing an
+              IANA Considerations Section in RFCs", BCP 26, RFC 5226,
+              May 2008.
+
+   [RFC5340]  Coltun, R., Ferguson, D., Moy, J., and A. Lindem, "OSPF
+              for IPv6", RFC 5340, July 2008.
+
+8.2.  Informative References
+
+   [MRT PROG GUIDE]
+              Labovitz, C., "MRT Programmer's Guide", November 1999,
+              <http://www.merit.edu/networkresearch/mrtprogrammer.pdf>.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 22]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+Appendix A.  Deprecated MRT types
+
+   This Appendix lists deprecated MRT types.  These types are documented
+   for informational purposes only.  While documented in some
+   references, they are not known to have been generally implemented.
+
+A.1.  Deprecated MRT Informational Types
+
+   The deprecated MRT Informational Types are defined below:
+
+       0    NULL
+       2    DIE
+       4    PEER_DOWN
+
+A.1.1.  NULL Type
+
+   The NULL Type message causes no operation.
+
+A.1.2.  DIE Type
+
+   The DIE Type signals a remote MRT repository it should stop accepting
+   messages.
+
+A.1.3.  PEER_DOWN Type
+
+   The PEER_DOWN message was intended to indicate that a collector had
+   lost association with a BGP peer.  However, the MRT format provides
+   BGP state change message types which duplicate this functionality.
+
+A.2.  Deprecated MRT Routing Information Types
+
+       5    BGP
+       6    RIP
+       7    IDRP
+       8    RIPNG
+       9    BGP4PLUS
+       10   BGP4PLUS_01
+
+A.2.1.  BGP Type
+
+   The BGP Type indicates the Message field contains BGP routing
+   information.  The BGP routing protocol is defined in RFC 4271
+   [RFC4271].  The information in the message is dependent on the
+   Subtype value.  The BGP Type and all associated Subtypes below are
+   considered to be deprecated by the BGP4MP Type.
+
+   The following BGP Subtypes are defined for the MRT BGP Type.  As with
+   the BGP Type itself, they are all considered to be deprecated.
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 23]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+       0    BGP_NULL
+       1    BGP_UPDATE
+       2    BGP_PREF_UPDATE
+       3    BGP_STATE_CHANGE
+       4    BGP_SYNC
+       5    BGP_OPEN
+       6    BGP_NOTIFY
+       7    BGP_KEEPALIVE
+
+A.2.1.1.  BGP_NULL Subtype
+
+   The BGP_NULL Subtype is a reserved Subtype.
+
+A.2.1.2.  BGP_UPDATE Subtype
+
+   The BGP_UPDATE Subtype is used to encode BGP UPDATE messages.  The
+   format of the MRT Message field for this Subtype is as follows:
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |         Peer AS number        |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                         Peer IP address                       |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |        Local AS number        |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                        Local IP address                       |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                    BGP UPDATE Contents (variable)
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   The BGP UPDATE Contents include the entire BGP UPDATE message which
+   follows the BGP Message Header.  The BGP Message Header itself is not
+   included.  The Peer AS number and IP address fields contain the AS
+   number and IP address of the remote system which are generating the
+   BGP UPDATE messages.  The Local AS number and IP address fields
+   contain the AS number and IP address of the local collector system
+   which is archiving the messages.
+
+A.2.1.3.  BGP_PREF_UPDATE Subtype
+
+   The BGP_PREF_UPDATE Subtype is not defined.
+
+A.2.1.4.  BGP_STATE_CHANGE Subtype
+
+   The BGP_STATE_CHANGE Subtype is used to record changes in the BGP
+   finite state machine.  These FSM states are defined in RFC 4271
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 24]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+   [RFC4271], Section 8.2.2.  Both the old state value and the new state
+   value are encoded as 2-octet numbers.  The state values are defined
+   numerically as follows:
+
+       1    Idle
+       2    Connect
+       3    Active
+       4    OpenSent
+       5    OpenConfirm
+       6    Established
+
+   The format of the MRT Message field is as follows:
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |         Peer AS number        |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                        Peer IP address                        |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |            Old State          |          New State            |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+A.2.1.5.  BGP_SYNC Subtype
+
+   The BGP_SYNC Subtype was intended to convey a system file name where
+   BGP Table Dump messages should be recorded.  The View # was to
+   correspond to the View # provided in the TABLE_DUMP Type messages.
+   There are no known implementations of this subtype and it SHOULD be
+   ignored.  The following format applies to this Subtype:
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |        View #                 |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |            File Name... (variable)
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   The File Name is terminated with a NULL (0) character.
+
+A.2.1.6.  BGP_OPEN Subtype
+
+   The BGP_OPEN Subtype is used to encode BGP OPEN messages.  The format
+   of the MRT Message field for this Subtype is the same as the
+   BGP_UPDATE, however, the last field contains the contents of the BGP
+   OPEN message.
+
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 25]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+A.2.1.7.  BGP_NOTIFY Subtype
+
+   The BGP_NOTIFY Subtype is used to encode BGP NOTIFICATION messages.
+   The format of the MRT Message field for this Subtype is the same as
+   the BGP_UPDATE, however, the last field contains the contents of the
+   BGP NOTIFICATION message.
+
+A.2.1.8.  BGP_KEEPALIVE Subtype
+
+   The BGP_KEEPALIVE Subtype is used to encode BGP KEEPALIVE messages.
+   The format of the MRT Message field for this Subtype is the same as
+   the BGP_UPDATE, however, the last field contains no information.
+
+A.2.2.  RIP Type
+
+   The RIP Type is used to export RIP protocol packets as defined in RFC
+   1058 [RFC1058].  The Subtype field is currently reserved for this
+   Type and SHOULD be set to 0.
+
+   The format of the MRT Message field for the RIP Type is as follows:
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                         Peer IP address                       |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                         Local IP address                      |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                    RIP Message Contents (variable)
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+A.2.3.  IDRP Type
+
+   The IDRP Type is used to export Inter-Domain-Routing Protocol (IDRP)
+   protocol information as defined in the ISO/IEC 10747 standard.  The
+   Subtype field is unused.  This Type is deprecated due to lack of
+   deployment of IDRP.
+
+A.2.4.  RIPNG Type
+
+   The RIPNG Type is used to export RIPNG protocol packets as defined in
+   RFC 2080 [RFC2080].  The RIPNG protocol updates the RIP protocol to
+   support IPv6.  The Subtype field is currently reserved for this Type
+   and SHOULD be set to 0.
+
+   The format of the MRT Message field for the RIPNG Type is as follows:
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 26]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                                                               |
+       ~                        Peer IPv6 address                      ~
+       |                                                               |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                                                               |
+       ~                        Local IPv6 address                     ~
+       |                                                               |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                  RIPNG Message Contents (variable)
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+A.2.5.  BGP4PLUS and BGP4PLUS_01 Types
+
+   The BGP4PLUS and BGP4PLUS_01 Types were defined to support IPv6 BGP
+   routing information.  The BGP4PLUS Type was specified based on the
+   initial Internet Draft for Multiprotocol Extensions to BGP-4.  The
+   BGP4PLUS_01 Type was specified to correspond to the -01 revision of
+   this Internet Draft.  The two Types share the same definitions in
+   terms of their MRT format specifications.
+
+   The Subtype field definitions are shared with the BGP Type, however,
+   the address fields in the BGP_UPDATE, BGP_OPEN, BGP_NOTIFY,
+   BGP_KEEPALIVE, and BGP_STATE_CHANGE Subtype messages are extended to
+   16 octets for IPv6 addresses.  As with the BGP Type, the BGP4PLUS and
+   BGP4PLUS_01 Types are deprecated as they superseded by the BGP4MP
+   Type.
+
+A.2.6.  Deprecated BGP4MP Subtypes
+
+   The following two subtypes of the BGP4MP Type are considered to be
+   deprecated.
+
+       2    BGP4MP_ENTRY
+       3    BGP4MP_SNAPSHOT
+
+A.2.6.1.  BGP4MP_ENTRY Subtype
+
+   This Subtype is similar to the TABLE_DUMP Type and is used to record
+   RIB table entries.  It extends the TABLE_DUMP Type to include true
+   multiprotocol support.  However, this Type does not support 4-Byte AS
+   numbers and has not been widely implemented.  This Type is deprecated
+   in favor of the TABLE_DUMP_V2 which includes 4-Byte AS number support
+   and a more compact format.
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 27]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |         Peer AS number        |        Local AS number        |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |        Interface Index        |        Address Family         |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                      Peer IP address (variable)               |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                      Local IP address (variable)              |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |           View #              |             Status            |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                        Time last change                       |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |        Address Family         |    SAFI       | Next-Hop-Len  |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                     Next Hop Address (variable)               |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       | Prefix Length  |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                     Address Prefix (variable)                 |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |       Attribute Length        |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                    BGP Attribute... (variable)
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+A.2.6.2.  BGP4MP_SNAPSHOT Subtype
+
+   This Subtype was intended to convey a system file name where
+   BGP4MP_ENTRY messages should be recorded.  It is similar to the
+   BGP_SYNC message Subtype and is deprecated.
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |        View #                 |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |            File Name... (variable)
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+
+
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 28]
+
+Internet-Draft                 MRT Format                     March 2010
+
+
+Authors' Addresses
+
+   Larry Blunk
+   Merit Network
+
+   Email: ljb@merit.edu
+
+
+   Manish Karir
+   Merit Network
+
+   Email: mkarir@merit.edu
+
+
+   Craig Labovitz
+   Arbor Networks
+
+   Email: labovit@arbor.net
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Blunk, et al.           Expires September 9, 2010              [Page 29]
+
+
diff --git a/doc/rfc1771.txt b/doc/rfc1771.txt
@@ -0,0 +1,3195 @@
+
+
+
+
+
+
+Network Working Group                                         Y. Rekhter
+Request for Comments: 1771        T.J. Watson Research Center, IBM Corp.
+Obsoletes: 1654                                                    T. Li
+Category: Standards Track                                  cisco Systems
+                                                                 Editors
+                                                              March 1995
+
+
+                  A Border Gateway Protocol 4 (BGP-4)
+
+Status of this Memo
+
+   This document specifies an Internet standards track protocol for the
+   Internet community, and requests discussion and suggestions for
+   improvements.  Please refer to the current edition of the "Internet
+   Official Protocol Standards" (STD 1) for the standardization state
+   and status of this protocol.  Distribution of this memo is unlimited.
+
+Abstract
+
+   This document, together with its companion document, "Application of
+   the Border Gateway Protocol in the Internet", define an inter-
+   autonomous system routing protocol for the Internet.
+
+1. Acknowledgements
+
+   This document was originally published as RFC 1267 in October 1991,
+   jointly authored by Kirk Lougheed (cisco Systems) and Yakov Rekhter
+   (IBM).
+
+   We would like to express our thanks to Guy Almes (ANS), Len Bosack
+   (cisco Systems), and Jeffrey C. Honig (Cornell University) for their
+   contributions to the earlier version of this document.
+
+   We like to explicitly thank Bob Braden (ISI) for the review of the
+   earlier version of this document as well as his constructive and
+   valuable comments.
+
+   We would also like to thank Bob Hinden, Director for Routing of the
+   Internet Engineering Steering Group, and the team of reviewers he
+   assembled to review the previous version (BGP-2) of this document.
+   This team, consisting of Deborah Estrin, Milo Medin, John Moy, Radia
+   Perlman, Martha Steenstrup, Mike St. Johns, and Paul Tsuchiya, acted
+   with a strong combination of toughness, professionalism, and
+   courtesy.
+
+
+
+
+
+
+Rekhter & Li                                                    [Page 1]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+   This updated version of the document is the product of the IETF IDR
+   Working Group with Yakov Rekhter and Tony Li as editors. Certain
+   sections of the document borrowed heavily from IDRP [7], which is the
+   OSI counterpart of BGP. For this credit should be given to the ANSI
+   X3S3.3 group chaired by Lyman Chapin (BBN) and to Charles Kunzinger
+   (IBM Corp.) who was the IDRP editor within that group.  We would also
+   like to thank Mike Craren (Proteon, Inc.), Dimitry Haskin (Bay
+   Networks, Inc.), John Krawczyk (Bay Networks, Inc.), and Paul Traina
+   (cisco Systems) for their insightful comments.
+
+   We would like to specially acknowledge numerous contributions by
+   Dennis Ferguson (MCI).
+
+   The work of Yakov Rekhter was supported in part by the National
+   Science Foundation under Grant Number NCR-9219216.
+
+2.  Introduction
+
+   The Border Gateway Protocol (BGP) is an inter-Autonomous System
+   routing protocol.  It is built on experience gained with EGP as
+   defined in RFC 904 [1] and EGP usage in the NSFNET Backbone as
+   described in RFC 1092 [2] and RFC 1093 [3].
+
+   The primary function of a BGP speaking system is to exchange network
+   reachability information with other BGP systems.  This network
+   reachability information includes information on the list of
+   Autonomous Systems (ASs) that reachability information traverses.
+   This information is sufficient to construct a graph of AS
+   connectivity from which routing loops may be pruned and some policy
+   decisions at the AS level may be enforced.
+
+   BGP-4 provides a new set of mechanisms for supporting classless
+   interdomain routing.  These mechanisms include support for
+   advertising an IP prefix and eliminates the concept of network
+   "class" within BGP.  BGP-4 also introduces mechanisms which allow
+   aggregation of routes, including aggregation of AS paths.  These
+   changes provide support for the proposed supernetting scheme [8, 9].
+
+   To characterize the set of policy decisions that can be enforced
+   using BGP, one must focus on the rule that a BGP speaker advertise to
+   its peers (other BGP speakers which it communicates with) in
+   neighboring ASs only those routes that it itself uses.  This rule
+   reflects the "hop-by-hop" routing paradigm generally used throughout
+   the current Internet.  Note that some policies cannot be supported by
+   the "hop-by-hop" routing paradigm and thus require techniques such as
+   source routing to enforce.  For example, BGP does not enable one AS
+   to send traffic to a neighboring AS intending that the traffic take a
+   different route from that taken by traffic originating in the
+
+
+
+Rekhter & Li                                                    [Page 2]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+   neighboring AS.  On the other hand, BGP can support any policy
+   conforming to the "hop-by-hop" routing paradigm.  Since the current
+   Internet uses only the "hop-by-hop" routing paradigm and since BGP
+   can support any policy that conforms to that paradigm, BGP is highly
+   applicable as an inter-AS routing protocol for the current Internet.
+
+   A more complete discussion of what policies can and cannot be
+   enforced with BGP is outside the scope of this document (but refer to
+   the companion document discussing BGP usage [5]).
+
+   BGP runs over a reliable transport protocol.  This eliminates the
+   need to implement explicit update fragmentation, retransmission,
+   acknowledgement, and sequencing.  Any authentication scheme used by
+   the transport protocol may be used in addition to BGP's own
+   authentication mechanisms.  The error notification mechanism used in
+   BGP assumes that the transport protocol supports a "graceful" close,
+   i.e., that all outstanding data will be delivered before the
+   connection is closed.
+
+   BGP uses TCP [4] as its transport protocol.  TCP meets BGP's
+   transport requirements and is present in virtually all commercial
+   routers and hosts.  In the following descriptions the phrase
+   "transport protocol connection" can be understood to refer to a TCP
+   connection.  BGP uses TCP port 179 for establishing its connections.
+
+   This document uses the term `Autonomous System' (AS) throughout.  The
+   classic definition of an Autonomous System is a set of routers under
+   a single technical administration, using an interior gateway protocol
+   and common metrics to route packets within the AS, and using an
+   exterior gateway protocol to route packets to other ASs.  Since this
+   classic definition was developed, it has become common for a single
+   AS to use several interior gateway protocols and sometimes several
+   sets of metrics within an AS.  The use of the term Autonomous System
+   here stresses the fact that, even when multiple IGPs and metrics are
+   used, the administration of an AS appears to other ASs to have a
+   single coherent interior routing plan and presents a consistent
+   picture of what destinations are reachable through it.
+
+   The planned use of BGP in the Internet environment, including such
+   issues as topology, the interaction between BGP and IGPs, and the
+   enforcement of routing policy rules is presented in a companion
+   document [5].  This document is the first of a series of documents
+   planned to explore various aspects of BGP application.  Please send
+   comments to the BGP mailing list (bgp@ans.net).
+
+
+
+
+
+
+
+Rekhter & Li                                                    [Page 3]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+3.  Summary of Operation
+
+   Two systems form a transport protocol connection between one another.
+   They exchange messages to open and confirm the connection parameters.
+   The initial data flow is the entire BGP routing table.  Incremental
+   updates are sent as the routing tables change.  BGP does not require
+   periodic refresh of the entire BGP routing table.  Therefore, a BGP
+   speaker must retain the current version of the entire BGP routing
+   tables of all of its peers for the duration of the connection.
+   KeepAlive messages are sent periodically to ensure the liveness of
+   the connection.  Notification messages are sent in response to errors
+   or special conditions.  If a connection encounters an error
+   condition, a notification message is sent and the connection is
+   closed.
+
+   The hosts executing the Border Gateway Protocol need not be routers.
+   A non-routing host could exchange routing information with routers
+   via EGP or even an interior routing protocol.  That non-routing host
+   could then use BGP to exchange routing information with a border
+   router in another Autonomous System.  The implications and
+   applications of this architecture are for further study.
+
+   If a particular AS has multiple BGP speakers and is providing transit
+   service for other ASs, then care must be taken to ensure a consistent
+   view of routing within the AS.  A consistent view of the interior
+   routes of the AS is provided by the interior routing protocol.  A
+   consistent view of the routes exterior to the AS can be provided by
+   having all BGP speakers within the AS maintain direct BGP connections
+   with each other.  Using a common set of policies, the BGP speakers
+   arrive at an agreement as to which border routers will serve as
+   exit/entry points for particular destinations outside the AS.  This
+   information is communicated to the AS's internal routers, possibly
+   via the interior routing protocol.  Care must be taken to ensure that
+   the interior routers have all been updated with transit information
+   before the BGP speakers announce to other ASs that transit service is
+   being provided.
+
+   Connections between BGP speakers of different ASs are referred to as
+   "external" links.  BGP connections between BGP speakers within the
+   same AS are referred to as "internal" links.  Similarly, a peer in a
+   different AS is referred to as an external peer, while a peer in the
+   same AS may be described as an internal peer.
+
+
+
+
+
+
+
+
+
+Rekhter & Li                                                    [Page 4]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+3.1 Routes: Advertisement and Storage
+
+   For purposes of this protocol a route is defined as a unit of
+   information that pairs a destination with the attributes of a path to
+   that destination:
+
+      - Routes are advertised between a pair of BGP speakers in UPDATE
+      messages:  the destination is the systems whose IP addresses are
+      reported in the Network Layer Reachability Information (NLRI)
+      field, and the the path is the information reported in the path
+      attributes fields of the same UPDATE message.
+
+      - Routes are stored in the Routing Information Bases (RIBs):
+      namely, the Adj-RIBs-In, the Loc-RIB, and the Adj-RIBs-Out. Routes
+      that will be advertised to other BGP speakers must be present in
+      the Adj-RIB-Out; routes that will be used by the local BGP speaker
+      must be present in the Loc-RIB, and the next hop for each of these
+      routes must be present in the local BGP speaker's forwarding
+      information base; and routes that are received from other BGP
+      speakers are present in the Adj-RIBs-In.
+
+   If a BGP speaker chooses to advertise the route, it may add to or
+   modify the path attributes of the route before advertising it to a
+   peer.
+
+   BGP provides mechanisms by which a BGP speaker can inform its peer
+   that a previously advertised route is no longer available for use.
+   There are three methods by which a given BGP speaker can indicate
+   that a route has been withdrawn from service:
+
+      a) the IP prefix that expresses destinations for a previously
+      advertised route can be advertised in the WITHDRAWN ROUTES field
+      in the UPDATE message, thus marking the associated route as being
+      no longer available for use
+
+      b) a replacement route with the same Network Layer Reachability
+      Information can be advertised, or
+
+      c) the BGP speaker - BGP speaker connection can be closed, which
+      implicitly removes from service all routes which the pair of
+      speakers had advertised to each other.
+
+
+
+
+
+
+
+
+
+
+Rekhter & Li                                                    [Page 5]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+3.2 Routing Information Bases
+
+   The Routing Information Base (RIB) within a BGP speaker consists of
+   three distinct parts:
+
+      a) Adj-RIBs-In: The Adj-RIBs-In store routing information that has
+      been learned from inbound UPDATE messages. Their contents
+      represent routes that are available as an input to the Decision
+      Process.
+
+      b) Loc-RIB: The Loc-RIB contains the local routing information
+      that the BGP speaker has selected by applying its local policies
+      to the routing information contained in its Adj-RIBs-In.
+
+      c) Adj-RIBs-Out: The Adj-RIBs-Out store the information that the
+      local BGP speaker has selected for advertisement to its peers. The
+      routing information stored in the Adj-RIBs-Out will be carried in
+      the local BGP speaker's UPDATE messages and advertised to its
+      peers.
+
+   In summary, the Adj-RIBs-In contain unprocessed routing information
+   that has been advertised to the local BGP speaker by its peers; the
+   Loc-RIB contains the routes that have been selected by the local BGP
+   speaker's Decision Process; and the Adj-RIBs-Out organize the routes
+   for advertisement to specific peers by means of the local speaker's
+   UPDATE messages.
+
+   Although the conceptual model distinguishes between Adj-RIBs-In,
+   Loc-RIB, and Adj-RIBs-Out, this neither implies nor requires that an
+   implementation must maintain three separate copies of the routing
+   information. The choice of implementation (for example, 3 copies of
+   the information vs 1 copy with pointers) is not constrained by the
+   protocol.
+
+4.  Message Formats
+
+   This section describes message formats used by BGP.
+
+   Messages are sent over a reliable transport protocol connection.  A
+   message is processed only after it is entirely received.  The maximum
+   message size is 4096 octets.  All implementations are required to
+   support this maximum message size.  The smallest message that may be
+   sent consists of a BGP header without a data portion, or 19 octets.
+
+
+
+
+
+
+
+
+Rekhter & Li                                                    [Page 6]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+4.1 Message Header Format
+
+   Each message has a fixed-size header.  There may or may not be a data
+   portion following the header, depending on the message type.  The
+   layout of these fields is shown below:
+
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                                                               |
+      +                                                               +
+      |                                                               |
+      +                                                               +
+      |                           Marker                              |
+      +                                                               +
+      |                                                               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |          Length               |      Type     |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+      Marker:
+
+         This 16-octet field contains a value that the receiver of the
+         message can predict.  If the Type of the message is OPEN, or if
+         the OPEN message carries no Authentication Information (as an
+         Optional Parameter), then the Marker must be all ones.
+         Otherwise, the value of the marker can be predicted by some a
+         computation specified as part of the authentication mechanism
+         (which is specified as part of the Authentication Information)
+         used.  The Marker can be used to detect loss of synchronization
+         between a pair of BGP peers, and to authenticate incoming BGP
+         messages.
+
+      Length:
+
+         This 2-octet unsigned integer indicates the total length of the
+         message, including the header, in octets.  Thus, e.g., it
+         allows one to locate in the transport-level stream the (Marker
+         field of the) next message.  The value of the Length field must
+         always be at least 19 and no greater than 4096, and may be
+         further constrained, depending on the message type.  No
+         "padding" of extra data after the message is allowed, so the
+         Length field must have the smallest value required given the
+         rest of the message.
+
+
+
+
+
+
+
+Rekhter & Li                                                    [Page 7]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+      Type:
+
+         This 1-octet unsigned integer indicates the type code of the
+         message.  The following type codes are defined:
+
+                                    1 - OPEN
+                                    2 - UPDATE
+                                    3 - NOTIFICATION
+                                    4 - KEEPALIVE
+
+4.2 OPEN Message Format
+
+   After a transport protocol connection is established, the first
+   message sent by each side is an OPEN message.  If the OPEN message is
+   acceptable, a KEEPALIVE message confirming the OPEN is sent back.
+   Once the OPEN is confirmed, UPDATE, KEEPALIVE, and NOTIFICATION
+   messages may be exchanged.
+
+   In addition to the fixed-size BGP header, the OPEN message contains
+   the following fields:
+
+        0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+
+       |    Version    |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |     My Autonomous System      |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |           Hold Time           |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                         BGP Identifier                        |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       | Opt Parm Len  |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                                                               |
+       |                       Optional Parameters                     |
+       |                                                               |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+      Version:
+
+         This 1-octet unsigned integer indicates the protocol version
+         number of the message.  The current BGP version number is 4.
+
+      My Autonomous System:
+
+         This 2-octet unsigned integer indicates the Autonomous System
+         number of the sender.
+
+
+
+Rekhter & Li                                                    [Page 8]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+      Hold Time:
+
+         This 2-octet unsigned integer indicates the number of seconds
+         that the sender proposes for the value of the Hold Timer.  Upon
+         receipt of an OPEN message, a BGP speaker MUST calculate the
+         value of the Hold Timer by using the smaller of its configured
+         Hold Time and the Hold Time received in the OPEN message.  The
+         Hold Time MUST be either zero or at least three seconds.  An
+         implementation may reject connections on the basis of the Hold
+         Time.  The calculated value indicates the maximum number of
+         seconds that may elapse between the receipt of successive
+         KEEPALIVE, and/or UPDATE messages by the sender.
+
+      BGP Identifier:
+
+         This 4-octet unsigned integer indicates the BGP Identifier of
+         the sender. A given BGP speaker sets the value of its BGP
+         Identifier to an IP address assigned to that BGP speaker.  The
+         value of the BGP Identifier is determined on startup and is the
+         same for every local interface and every BGP peer.
+
+      Optional Parameters Length:
+
+         This 1-octet unsigned integer indicates the total length of the
+         Optional Parameters field in octets. If the value of this field
+         is zero, no Optional Parameters are present.
+
+      Optional Parameters:
+
+         This field may contain a list of optional parameters, where
+         each parameter is encoded as a <Parameter Type, Parameter
+         Length, Parameter Value> triplet.
+
+          0                   1
+          0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-...
+         |  Parm. Type   | Parm. Length  |  Parameter Value (variable)
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-...
+
+         Parameter Type is a one octet field that unambiguously
+         identifies individual parameters. Parameter Length is a one
+         octet field that contains the length of the Parameter Value
+         field in octets.  Parameter Value is a variable length field
+         that is interpreted according to the value of the Parameter
+         Type field.
+
+
+
+
+
+
+Rekhter & Li                                                    [Page 9]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+         This document defines the following Optional Parameters:
+
+         a) Authentication Information (Parameter Type 1):
+
+            This optional parameter may be used to authenticate a BGP
+            peer. The Parameter Value field contains a 1-octet
+            Authentication Code followed by a variable length
+            Authentication Data.
+
+                0 1 2 3 4 5 6 7 8
+                +-+-+-+-+-+-+-+-+
+                |  Auth. Code   |
+                +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+                |                                                     |
+                |              Authentication Data                    |
+                |                                                     |
+                +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+               Authentication Code:
+
+                  This 1-octet unsigned integer indicates the
+                  authentication mechanism being used.  Whenever an
+                  authentication mechanism is specified for use within
+                  BGP, three things must be included in the
+                  specification:
+
+                  - the value of the Authentication Code which indicates
+                  use of the mechanism,
+                  - the form and meaning of the Authentication Data, and
+                  - the algorithm for computing values of Marker fields.
+
+                  Note that a separate authentication mechanism may be
+                  used in establishing the transport level connection.
+
+               Authentication Data:
+
+                  The form and meaning of this field is a variable-
+                  length field depend on the Authentication Code.
+
+         The minimum length of the OPEN message is 29 octets (including
+         message header).
+
+
+
+
+
+
+
+
+
+
+Rekhter & Li                                                   [Page 10]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+4.3 UPDATE Message Format
+
+   UPDATE messages are used to transfer routing information between BGP
+   peers.  The information in the UPDATE packet can be used to construct
+   a graph describing the relationships of the various Autonomous
+   Systems.  By applying rules to be discussed, routing information
+   loops and some other anomalies may be detected and removed from
+   inter-AS routing.
+
+   An UPDATE message is used to advertise a single feasible route to a
+   peer, or to withdraw multiple unfeasible routes from service (see
+   3.1). An UPDATE message may simultaneously advertise a feasible route
+   and withdraw multiple unfeasible routes from service.  The UPDATE
+   message always includes the fixed-size BGP header, and can optionally
+   include the other fields as shown below:
+
+      +-----------------------------------------------------+
+      |   Unfeasible Routes Length (2 octets)               |
+      +-----------------------------------------------------+
+      |  Withdrawn Routes (variable)                        |
+      +-----------------------------------------------------+
+      |   Total Path Attribute Length (2 octets)            |
+      +-----------------------------------------------------+
+      |    Path Attributes (variable)                       |
+      +-----------------------------------------------------+
+      |   Network Layer Reachability Information (variable) |
+      +-----------------------------------------------------+
+
+      Unfeasible Routes Length:
+
+         This 2-octets unsigned integer indicates the total length of
+         the Withdrawn Routes field in octets.  Its value must allow the
+         length of the Network Layer Reachability Information field to
+         be determined as specified below.
+
+         A value of 0 indicates that no routes are being withdrawn from
+         service, and that the WITHDRAWN ROUTES field is not present in
+         this UPDATE message.
+
+      Withdrawn Routes:
+
+         This is a variable length field that contains a list of IP
+         address prefixes for the routes that are being withdrawn from
+         service.  Each IP address prefix is encoded as a 2-tuple of the
+         form <length, prefix>, whose fields are described below:
+
+
+
+
+
+
+Rekhter & Li                                                   [Page 11]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+                  +---------------------------+
+                  |   Length (1 octet)        |
+                  +---------------------------+
+                  |   Prefix (variable)       |
+                  +---------------------------+
+
+         The use and the meaning of these fields are as follows:
+
+         a) Length:
+
+            The Length field indicates the length in bits of the IP
+            address prefix. A length of zero indicates a prefix that
+            matches all IP addresses (with prefix, itself, of zero
+            octets).
+
+         b) Prefix:
+
+            The Prefix field contains IP address prefixes followed by
+            enough trailing bits to make the end of the field fall on an
+            octet boundary. Note that the value of trailing bits is
+            irrelevant.
+
+      Total Path Attribute Length:
+
+         This 2-octet unsigned integer indicates the total length of the
+         Path Attributes field in octets.  Its value must allow the
+         length of the Network Layer Reachability field to be determined
+         as specified below.
+
+         A value of 0 indicates that no Network Layer Reachability
+         Information field is present in this UPDATE message.
+
+      Path Attributes:
+
+         A variable length sequence of path attributes is present in
+         every UPDATE.  Each path attribute is a triple <attribute type,
+         attribute length, attribute value> of variable length.
+
+         Attribute Type is a two-octet field that consists of the
+         Attribute Flags octet followed by the Attribute Type Code
+         octet.
+
+                0                   1
+                0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+               |  Attr. Flags  |Attr. Type Code|
+               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+
+Rekhter & Li                                                   [Page 12]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+         The high-order bit (bit 0) of the Attribute Flags octet is the
+         Optional bit.  It defines whether the attribute is optional (if
+         set to 1) or well-known (if set to 0).
+
+         The second high-order bit (bit 1) of the Attribute Flags octet
+         is the Transitive bit.  It defines whether an optional
+         attribute is transitive (if set to 1) or non-transitive (if set
+         to 0).  For well-known attributes, the Transitive bit must be
+         set to 1.  (See Section 5 for a discussion of transitive
+         attributes.)
+
+         The third high-order bit (bit 2) of the Attribute Flags octet
+         is the Partial bit.  It defines whether the information
+         contained in the optional transitive attribute is partial (if
+         set to 1) or complete (if set to 0).  For well-known attributes
+         and for optional non-transitive attributes the Partial bit must
+         be set to 0.
+
+         The fourth high-order bit (bit 3) of the Attribute Flags octet
+         is the Extended Length bit.  It defines whether the Attribute
+         Length is one octet (if set to 0) or two octets (if set to 1).
+         Extended Length may be used only if the length of the attribute
+         value is greater than 255 octets.
+
+         The lower-order four bits of the Attribute Flags octet are .
+         unused. They must be zero (and must be ignored when received).
+
+         The Attribute Type Code octet contains the Attribute Type Code.
+         Currently defined Attribute Type Codes are discussed in Section
+         5.
+
+         If the Extended Length bit of the Attribute Flags octet is set
+         to 0, the third octet of the Path Attribute contains the length
+         of the attribute data in octets.
+
+         If the Extended Length bit of the Attribute Flags octet is set
+         to 1, then the third and the fourth octets of the path
+         attribute contain the length of the attribute data in octets.
+
+         The remaining octets of the Path Attribute represent the
+         attribute value and are interpreted according to the Attribute
+         Flags and the Attribute Type Code. The supported Attribute Type
+         Codes, their attribute values and uses are the following:
+
+
+
+
+
+
+
+
+Rekhter & Li                                                   [Page 13]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+         a)   ORIGIN (Type Code 1):
+
+            ORIGIN is a well-known mandatory attribute that defines the
+            origin of the path information.   The data octet can assume
+            the following values:
+
+                  Value      Meaning
+
+                  0         IGP - Network Layer Reachability Information
+                               is interior to the originating AS
+
+                  1         EGP - Network Layer Reachability Information
+                               learned via EGP
+
+                  2         INCOMPLETE - Network Layer Reachability
+                               Information learned by some other means
+
+            Its usage is defined in 5.1.1
+
+         b) AS_PATH (Type Code 2):
+
+            AS_PATH is a well-known mandatory attribute that is composed
+            of a sequence of AS path segments. Each AS path segment is
+            represented by a triple <path segment type, path segment
+            length, path segment value>.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Rekhter & Li                                                   [Page 14]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+            The path segment type is a 1-octet long field with the
+            following values defined:
+
+                  Value      Segment Type
+
+                  1         AS_SET: unordered set of ASs a route in the
+                               UPDATE message has traversed
+
+                  2         AS_SEQUENCE: ordered set of ASs a route in
+                               the UPDATE message has traversed
+
+            The path segment length is a 1-octet long field containing
+            the number of ASs in the path segment value field.
+
+            The path segment value field contains one or more AS
+            numbers, each encoded as a 2-octets long field.
+
+            Usage of this attribute is defined in 5.1.2.
+
+         c)   NEXT_HOP (Type Code 3):
+
+            This is a well-known mandatory attribute that defines the IP
+            address of the border router that should be used as the next
+            hop to the destinations listed in the Network Layer
+            Reachability field of the UPDATE message.
+
+            Usage of this attribute is defined in 5.1.3.
+
+         d) MULTI_EXIT_DISC (Type Code 4):
+
+            This is an optional non-transitive attribute that is a four
+            octet non-negative integer. The value of this attribute may
+            be used by a BGP speaker's decision process to discriminate
+            among multiple exit points to a neighboring autonomous
+            system.
+
+            Its usage is defined in 5.1.4.
+
+         e) LOCAL_PREF (Type Code 5):
+
+            LOCAL_PREF is a well-known discretionary attribute that is a
+            four octet non-negative integer. It is used by a BGP speaker
+            to inform other BGP speakers in its own autonomous system of
+            the originating speaker's degree of preference for an
+            advertised route. Usage of this attribute is described in
+            5.1.5.
+
+
+
+
+
+Rekhter & Li                                                   [Page 15]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+         f) ATOMIC_AGGREGATE (Type Code 6)
+
+            ATOMIC_AGGREGATE is a well-known discretionary attribute of
+            length 0. It is used by a BGP speaker to inform other BGP
+            speakers that the local system selected a less specific
+            route without selecting a more specific route which is
+            included in it. Usage of this attribute is described in
+            5.1.6.
+
+         g) AGGREGATOR (Type Code 7)
+
+            AGGREGATOR is an optional transitive attribute of length 6.
+            The attribute contains the last AS number that formed the
+            aggregate route (encoded as 2 octets), followed by the IP
+            address of the BGP speaker that formed the aggregate route
+            (encoded as 4 octets).  Usage of this attribute is described
+            in 5.1.7
+
+      Network Layer Reachability Information:
+
+         This variable length field contains a list of IP address
+         prefixes.  The length in octets of the Network Layer
+         Reachability Information is not encoded explicitly, but can be
+         calculated as:
+
+            UPDATE message Length - 23 - Total Path Attributes Length -
+            Unfeasible Routes Length
+
+         where UPDATE message Length is the value encoded in the fixed-
+         size BGP header, Total Path Attribute Length and Unfeasible
+         Routes Length  are the values encoded in the variable part of
+         the UPDATE message, and 23 is a combined length of the fixed-
+         size BGP header, the Total Path Attribute Length field and the
+         Unfeasible Routes Length field.
+
+         Reachability information is encoded as one or more 2-tuples of
+         the form <length, prefix>, whose fields are described below:
+
+                  +---------------------------+
+                  |   Length (1 octet)        |
+                  +---------------------------+
+                  |   Prefix (variable)       |
+                  +---------------------------+
+
+
+
+
+
+
+
+
+Rekhter & Li                                                   [Page 16]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+         The use and the meaning of these fields are as follows:
+
+         a) Length:
+
+            The Length field indicates the length in bits of the IP
+            address prefix. A length of zero indicates a prefix that
+            matches all IP addresses (with prefix, itself, of zero
+            octets).
+
+         b) Prefix:
+
+            The Prefix field contains IP address prefixes followed by
+            enough trailing bits to make the end of the field fall on an
+            octet boundary. Note that the value of the trailing bits is
+            irrelevant.
+
+   The minimum length of the UPDATE message is 23 octets -- 19 octets
+   for the fixed header + 2 octets for the Unfeasible Routes Length + 2
+   octets for the Total Path Attribute Length (the value of Unfeasible
+   Routes Length is 0  and the value of Total Path Attribute Length is
+   0).
+
+   An UPDATE message can advertise at most one route, which may be
+   described by several path attributes. All path attributes contained
+   in a given UPDATE messages apply to the destinations carried in the
+   Network Layer Reachability Information field of the UPDATE message.
+
+   An UPDATE message can list multiple routes to be withdrawn from
+   service.  Each such route is identified by its destination (expressed
+   as an IP prefix), which unambiguously identifies the route in the
+   context of the BGP speaker - BGP speaker connection to which it has
+   been previously been advertised.
+
+   An UPDATE message may advertise only routes to be withdrawn from
+   service, in which case it will not include path attributes or Network
+   Layer Reachability Information. Conversely, it may advertise only a
+   feasible route, in which case the WITHDRAWN ROUTES field need not be
+   present.
+
+4.4 KEEPALIVE Message Format
+
+   BGP does not use any transport protocol-based keep-alive mechanism to
+   determine if peers are reachable.  Instead, KEEPALIVE messages are
+   exchanged between peers often enough as not to cause the Hold Timer
+   to expire.  A reasonable maximum time between KEEPALIVE messages
+   would be one third of the Hold Time interval.  KEEPALIVE messages
+   MUST NOT be sent more frequently than one per second.  An
+   implementation MAY adjust the rate at which it sends KEEPALIVE
+
+
+
+Rekhter & Li                                                   [Page 17]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+   messages as a function of the Hold Time interval.
+
+   If the negotiated Hold Time interval is zero, then periodic KEEPALIVE
+   messages MUST NOT be sent.
+
+   KEEPALIVE message consists of only message header and has a length of
+   19 octets.
+
+4.5 NOTIFICATION Message Format
+
+   A NOTIFICATION message is sent when an error condition is detected.
+   The BGP connection is closed immediately after sending it.
+
+   In addition to the fixed-size BGP header, the NOTIFICATION message
+   contains the following fields:
+
+        0                   1                   2                   3
+        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       | Error code    | Error subcode |           Data                |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+                               +
+       |                                                               |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+      Error Code:
+
+         This 1-octet unsigned integer indicates the type of
+         NOTIFICATION.  The following Error Codes have been defined:
+
+            Error Code       Symbolic Name               Reference
+
+              1         Message Header Error             Section 6.1
+
+              2         OPEN Message Error               Section 6.2
+
+              3         UPDATE Message Error             Section 6.3
+
+              4         Hold Timer Expired               Section 6.5
+
+              5         Finite State Machine Error       Section 6.6
+
+              6         Cease                            Section 6.7
+
+      Error subcode:
+
+         This 1-octet unsigned integer provides more specific
+         information about the nature of the reported error.  Each Error
+         Code may have one or more Error Subcodes associated with it.
+
+
+
+Rekhter & Li                                                   [Page 18]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+         If no appropriate Error Subcode is defined, then a zero
+         (Unspecific) value is used for the Error Subcode field.
+
+         Message Header Error subcodes:
+
+                               1  - Connection Not Synchronized.
+                               2  - Bad Message Length.
+                               3  - Bad Message Type.
+
+         OPEN Message Error subcodes:
+
+                               1  - Unsupported Version Number.
+                               2  - Bad Peer AS.
+                               3  - Bad BGP Identifier. '
+         4  - Unsupported Optional Parameter.
+                               5  - Authentication Failure.
+                                           6  - Unacceptable Hold Time.
+
+         UPDATE Message Error subcodes:
+
+                               1 - Malformed Attribute List.
+                               2 - Unrecognized Well-known Attribute.
+                               3 - Missing Well-known Attribute.
+                               4 - Attribute Flags Error.
+                               5 - Attribute Length Error.
+                               6 - Invalid ORIGIN Attribute
+                               7 - AS Routing Loop.
+                               8 - Invalid NEXT_HOP Attribute.
+                               9 - Optional Attribute Error.
+                              10 - Invalid Network Field.
+                              11 - Malformed AS_PATH.
+
+      Data:
+
+         This variable-length field is used to diagnose the reason for
+         the NOTIFICATION.  The contents of the Data field depend upon
+         the Error Code and Error Subcode.  See Section 6 below for more
+         details.
+
+         Note that the length of the Data field can be determined from
+         the message Length field by the formula:
+
+                  Message Length = 21 + Data Length
+
+   The minimum length of the NOTIFICATION message is 21 octets
+   (including message header).
+
+
+
+
+
+Rekhter & Li                                                   [Page 19]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+5.  Path Attributes
+
+   This section discusses the path attributes of the UPDATE message.
+
+   Path attributes fall into four separate categories:
+
+               1. Well-known mandatory.
+               2. Well-known discretionary.
+               3. Optional transitive.
+               4. Optional non-transitive.
+
+   Well-known attributes must be recognized by all BGP implementations.
+   Some of these attributes are mandatory and must be included in every
+   UPDATE message.  Others are discretionary and may or may not be sent
+   in a particular UPDATE message.
+
+   All well-known attributes must be passed along (after proper
+   updating, if necessary) to other BGP peers.
+
+   In addition to well-known attributes, each path may contain one or
+   more optional attributes.  It is not required or expected that all
+   BGP implementations support all optional attributes.  The handling of
+   an unrecognized optional attribute is determined by the setting of
+   the Transitive bit in the attribute flags octet.  Paths with
+   unrecognized transitive optional attributes should be accepted. If a
+   path with unrecognized transitive optional attribute is accepted and
+   passed along to other BGP peers, then the unrecognized transitive
+   optional attribute of that path must be passed along with the path to
+   other BGP peers with the Partial bit in the Attribute Flags octet set
+   to 1. If a path with recognized transitive optional attribute is
+   accepted and passed along to other BGP peers and the Partial bit in
+   the Attribute Flags octet is set to 1 by some previous AS, it is not
+   set back to 0 by the current AS. Unrecognized non-transitive optional
+   attributes must be quietly ignored and not passed along to other BGP
+   peers.
+
+   New transitive optional attributes may be attached to the path by the
+   originator or by any other AS in the path.  If they are not attached
+   by the originator, the Partial bit in the Attribute Flags octet is
+   set to 1.  The rules for attaching new non-transitive optional
+   attributes will depend on the nature of the specific attribute.  The
+   documentation of each new non-transitive optional attribute will be
+   expected to include such rules.  (The description of the
+   MULTI_EXIT_DISC attribute gives an example.)  All optional attributes
+   (both transitive and non-transitive) may be updated (if appropriate)
+   by ASs in the path.
+
+
+
+
+
+Rekhter & Li                                                   [Page 20]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+   The sender of an UPDATE message should order path attributes within
+   the UPDATE message in ascending order of attribute type.  The
+   receiver of an UPDATE message must be prepared to handle path
+   attributes within the UPDATE message that are out of order.
+
+   The same attribute cannot appear more than once within the Path
+   Attributes field of a particular UPDATE message.
+
+5.1 Path Attribute Usage
+
+   The usage of each BGP path attributes is described in the following
+   clauses.
+
+5.1.1 ORIGIN
+
+   ORIGIN is a well-known mandatory attribute.  The ORIGIN attribute
+   shall be generated by the autonomous system that originates the
+   associated routing information. It shall be included in the UPDATE
+   messages of all BGP speakers that choose to propagate this
+   information to other BGP speakers.
+
+5.1.2   AS_PATH
+
+   AS_PATH is a well-known mandatory attribute. This attribute
+   identifies the autonomous systems through which routing information
+   carried in this UPDATE message has passed. The components of this
+   list can be AS_SETs or AS_SEQUENCEs.
+
+   When a BGP speaker propagates a route which it has learned from
+   another BGP speaker's UPDATE message, it shall modify the route's
+   AS_PATH attribute based on the location of the BGP speaker to which
+   the route will be sent:
+
+      a) When a given BGP speaker advertises the route to another BGP
+      speaker located in its own autonomous system, the advertising
+      speaker shall not modify the AS_PATH attribute associated with the
+      route.
+
+      b) When a given BGP speaker advertises the route to a BGP speaker
+      located in a neighboring autonomous system, then the advertising
+      speaker shall update the AS_PATH attribute as follows:
+
+         1) if the first path segment of the AS_PATH is of type
+         AS_SEQUENCE, the local system shall prepend its own AS number
+         as the last element of the sequence (put it in the leftmost
+         position).
+
+
+
+
+
+Rekhter & Li                                                   [Page 21]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+         2) if the first path segment of the AS_PATH is of type AS_SET,
+         the local system shall prepend a new path segment of type
+         AS_SEQUENCE to the AS_PATH, including its own AS number in that
+         segment.
+
+      When a BGP speaker originates a route then:
+
+         a) the originating speaker shall include its own AS number in
+         the AS_PATH attribute of all UPDATE messages sent to BGP
+         speakers located in neighboring autonomous systems. (In this
+         case, the AS number of the originating speaker's autonomous
+         system will be the only entry in the AS_PATH attribute).
+
+         b) the originating speaker shall include an empty AS_PATH
+         attribute in all UPDATE messages sent to BGP speakers located
+         in its own autonomous system. (An empty AS_PATH attribute is
+         one whose length field contains the value zero).
+
+5.1.3 NEXT_HOP
+
+   The NEXT_HOP path attribute defines the IP address of the border
+   router that should be used as the next hop to the destinations listed
+   in the UPDATE message.  If a border router belongs to the same AS as
+   its peer, then the peer is an internal border router. Otherwise, it
+   is an external border router.  A BGP speaker can advertise any
+   internal border router as the next hop provided that the interface
+   associated with the IP address of this border router (as specified in
+   the NEXT_HOP path attribute) shares a common subnet with both the
+   local and remote BGP speakers. A BGP speaker can advertise any
+   external border router as the next hop, provided that the IP address
+   of this border router was learned from one of the BGP speaker's
+   peers, and the interface associated with the IP address of this
+   border router (as specified in the NEXT_HOP path attribute) shares a
+   common subnet with the local and remote BGP speakers.  A BGP speaker
+   needs to be able to support disabling advertisement of external
+   border routers.
+
+   A BGP speaker must never advertise an address of a peer to that peer
+   as a NEXT_HOP, for a route that the speaker is originating.  A BGP
+   speaker must never install a route with itself as the next hop.
+
+   When a BGP speaker advertises the route to a BGP speaker located in
+   its own autonomous system, the advertising speaker shall not modify
+   the NEXT_HOP attribute associated with the route.  When a BGP speaker
+   receives the route via an internal link, it may forward packets to
+   the NEXT_HOP address if the address contained in the attribute is on
+   a common subnet with the local and remote BGP speakers.
+
+
+
+
+Rekhter & Li                                                   [Page 22]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+5.1.4   MULTI_EXIT_DISC
+
+   The MULTI_EXIT_DISC attribute may be used on external (inter-AS)
+   links to discriminate among multiple exit or entry points to the same
+   neighboring AS.  The value of the MULTI_EXIT_DISC attribute is a four
+   octet unsigned number which is called a metric.  All other factors
+   being equal, the exit or entry point with lower metric should be
+   preferred.  If received over external links, the MULTI_EXIT_DISC
+   attribute may be propagated over internal links to other BGP speakers
+   within the same AS.  The MULTI_EXIT_DISC attribute is never
+   propagated to other BGP speakers in neighboring AS's.
+
+5.1.5   LOCAL_PREF
+
+   LOCAL_PREF is a well-known discretionary attribute that shall be
+   included in all UPDATE messages that a given BGP speaker sends to the
+   other BGP speakers located in its own autonomous system. A BGP
+   speaker shall calculate the degree of preference for each external
+   route and include the degree of preference when advertising a route
+   to its internal peers. The higher degree of preference should be
+   preferred. A BGP speaker shall use the degree of preference learned
+   via LOCAL_PREF in its decision process (see section 9.1.1).
+
+   A BGP speaker shall not include this attribute in UPDATE messages
+   that it sends to BGP speakers located in a neighboring autonomous
+   system. If it is contained in an UPDATE message that is received from
+   a BGP speaker which is not located in the same autonomous system as
+   the receiving speaker, then this attribute shall be ignored by the
+   receiving speaker.
+
+5.1.6   ATOMIC_AGGREGATE
+
+   ATOMIC_AGGREGATE is a well-known discretionary attribute.  If a BGP
+   speaker, when presented with a set of overlapping routes from one of
+   its peers (see 9.1.4), selects the less specific route without
+   selecting the more specific one, then the local system shall attach
+   the ATOMIC_AGGREGATE attribute to the route when propagating it to
+   other BGP speakers (if that attribute is not already present in the
+   received less specific route). A BGP speaker that receives a route
+   with the ATOMIC_AGGREGATE attribute shall not remove the attribute
+   from the route when propagating it to other speakers. A BGP speaker
+   that receives a route with the ATOMIC_AGGREGATE attribute shall not
+   make any NLRI of that route more specific (as defined in 9.1.4) when
+   advertising this route to other BGP speakers.  A BGP speaker that
+   receives a route with the ATOMIC_AGGREGATE attribute needs to be
+   cognizant of the fact that the actual path to destinations, as
+   specified in the NLRI of the route, while having the loop-free
+   property, may traverse ASs that are not listed in the AS_PATH
+
+
+
+Rekhter & Li                                                   [Page 23]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+   attribute.
+
+5.1.7   AGGREGATOR
+
+   AGGREGATOR is an optional transitive attribute which may be included
+   in updates which are formed by aggregation (see Section 9.2.4.2).  A
+   BGP speaker which performs route aggregation may add the AGGREGATOR
+   attribute which shall contain its own AS number and IP address.
+
+6.  BGP Error Handling.
+
+   This section describes actions to be taken when errors are detected
+   while processing BGP messages.
+
+   When any of the conditions described here are detected, a
+   NOTIFICATION message with the indicated Error Code, Error Subcode,
+   and Data fields is sent, and the BGP connection is closed.  If no
+   Error Subcode is specified, then a zero must be used.
+
+   The phrase "the BGP connection is closed" means that the transport
+   protocol connection has been closed and that all resources for that
+   BGP connection have been deallocated.  Routing table entries
+   associated with the remote peer are marked as invalid.  The fact that
+   the routes have become invalid is passed to other BGP peers before
+   the routes are deleted from the system.
+
+   Unless specified explicitly, the Data field of the NOTIFICATION
+   message that is sent to indicate an error is empty.
+
+6.1 Message Header error handling.
+
+   All errors detected while processing the Message Header are indicated
+   by sending the NOTIFICATION message with Error Code Message Header
+   Error.  The Error Subcode elaborates on the specific nature of the
+   error.
+
+   The expected value of the Marker field of the message header is all
+   ones if the message type is OPEN.  The expected value of the Marker
+   field for all other types of BGP messages determined based on the
+   presence of the Authentication Information Optional Parameter in the
+   BGP OPEN message and the actual authentication mechanism (if the
+   Authentication Information in the BGP OPEN message is present). If
+   the Marker field of the message header is not the expected one, then
+   a synchronization error has occurred and the Error Subcode is set to
+   Connection Not Synchronized.
+
+
+
+
+
+
+Rekhter & Li                                                   [Page 24]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+   If the Length field of the message header is less than 19 or greater
+   than 4096, or if the Length field of an OPEN message is less  than
+   the minimum length of the OPEN message, or if the Length field of an
+   UPDATE message is less than the minimum length of the UPDATE message,
+   or if the Length field of a KEEPALIVE message is not equal to 19, or
+   if the Length field of a NOTIFICATION message is less than the
+   minimum length of the NOTIFICATION message, then the Error Subcode is
+   set to Bad Message Length.  The Data field contains the erroneous
+   Length field.
+
+   If the Type field of the message header is not recognized, then the
+   Error Subcode is set to Bad Message Type.  The Data field contains
+   the erroneous Type field.
+
+6.2 OPEN message error handling.
+
+   All errors detected while processing the OPEN message are indicated
+   by sending the NOTIFICATION message with Error Code OPEN Message
+   Error.  The Error Subcode elaborates on the specific nature of the
+   error.
+
+   If the version number contained in the Version field of the received
+   OPEN message is not supported, then the Error Subcode is set to
+   Unsupported Version Number.  The Data field is a 2-octet unsigned
+   integer, which indicates the largest locally supported version number
+   less than the version the remote BGP peer bid (as indicated in the
+   received OPEN message).
+
+   If the Autonomous System field of the OPEN message is unacceptable,
+   then the Error Subcode is set to Bad Peer AS.  The determination of
+   acceptable Autonomous System numbers is outside the scope of this
+   protocol.
+
+   If the Hold Time field of the OPEN message is unacceptable, then the
+   Error Subcode MUST be set to Unacceptable Hold Time.  An
+   implementation MUST reject Hold Time values of one or two seconds.
+   An implementation MAY reject any proposed Hold Time.  An
+   implementation which accepts a Hold Time MUST use the negotiated
+   value for the Hold Time.
+
+   If the BGP Identifier field of the OPEN message is syntactically
+   incorrect, then the Error Subcode is set to Bad BGP Identifier.
+   Syntactic correctness means that the BGP Identifier field represents
+   a valid IP host address.
+
+   If one of the Optional Parameters in the OPEN message is not
+   recognized, then the Error Subcode is set to Unsupported Optional
+   Parameters.
+
+
+
+Rekhter & Li                                                   [Page 25]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+   If the OPEN message carries Authentication Information (as an
+   Optional Parameter), then the corresponding authentication procedure
+   is invoked.  If the authentication procedure (based on Authentication
+   Code and Authentication Data) fails, then the Error Subcode is set to
+   Authentication Failure.
+
+6.3 UPDATE message error handling.
+
+   All errors detected while processing the UPDATE message are indicated
+   by sending the NOTIFICATION message with Error Code UPDATE Message
+   Error.  The error subcode elaborates on the specific nature of the
+   error.
+
+   Error checking of an UPDATE message begins by examining the path
+   attributes.  If the Unfeasible Routes Length or Total Attribute
+   Length is too large (i.e., if Unfeasible Routes Length + Total
+   Attribute Length + 23 exceeds the message Length), then the Error
+   Subcode is set to Malformed Attribute List.
+
+   If any recognized attribute has Attribute Flags that conflict with
+   the Attribute Type Code, then the Error Subcode is set to Attribute
+   Flags Error.  The Data field contains the erroneous attribute (type,
+   length and value).
+
+   If any recognized attribute has Attribute Length that conflicts with
+   the expected length (based on the attribute type code), then the
+   Error Subcode is set to Attribute Length Error.  The Data field
+   contains the erroneous attribute (type, length and value).
+
+   If any of the mandatory well-known attributes are not present, then
+   the Error Subcode is set to Missing Well-known Attribute.  The Data
+   field contains the Attribute Type Code of the missing well-known
+   attribute.
+
+   If any of the mandatory well-known attributes are not recognized,
+   then the Error Subcode is set to Unrecognized Well-known Attribute.
+   The Data field contains the unrecognized attribute (type, length and
+   value).
+
+   If the ORIGIN attribute has an undefined value, then the Error
+   Subcode is set to Invalid Origin Attribute.  The Data field contains
+   the unrecognized attribute (type, length and value).
+
+   If the NEXT_HOP attribute field is syntactically incorrect, then the
+   Error Subcode is set to Invalid NEXT_HOP Attribute.  The Data field
+   contains the incorrect attribute (type, length and value).  Syntactic
+   correctness means that the NEXT_HOP attribute represents a valid IP
+   host address.  Semantic correctness applies only to the external BGP
+
+
+
+Rekhter & Li                                                   [Page 26]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+   links. It means that the interface associated with the IP address, as
+   specified in the NEXT_HOP attribute, shares a common subnet with the
+   receiving BGP speaker and is not the IP address of the receiving BGP
+   speaker.  If the NEXT_HOP attribute is semantically incorrect, the
+   error should be logged, and the the route should be ignored.  In this
+   case, no NOTIFICATION message should be sent.
+
+   The AS_PATH attribute is checked for syntactic correctness.  If the
+   path is syntactically incorrect, then the Error Subcode is set to
+   Malformed AS_PATH.
+
+   If an optional attribute is recognized, then the value of this
+   attribute is checked.  If an error is detected, the attribute is
+   discarded, and the Error Subcode is set to Optional Attribute Error.
+   The Data field contains the attribute (type, length and value).
+
+   If any attribute appears more than once in the UPDATE message, then
+   the Error Subcode is set to Malformed Attribute List.
+
+   The NLRI field in the UPDATE message is checked for syntactic
+   validity.  If the field is syntactically incorrect, then the Error
+   Subcode is set to Invalid Network Field.
+
+6.4 NOTIFICATION message error handling.
+
+   If a peer sends a NOTIFICATION message, and there is an error in that
+   message, there is unfortunately no means of reporting this error via
+   a subsequent NOTIFICATION message.  Any such error, such as an
+   unrecognized Error Code or Error Subcode, should be noticed, logged
+   locally, and brought to the attention of the administration of the
+   peer.  The means to do this, however, lies outside the scope of this
+   document.
+
+6.5 Hold Timer Expired error handling.
+
+   If a system does not receive successive KEEPALIVE and/or UPDATE
+   and/or NOTIFICATION messages within the period specified in the Hold
+   Time field of the OPEN message, then the NOTIFICATION message with
+   Hold Timer Expired Error Code must be sent and the BGP connection
+   closed.
+
+6.6 Finite State Machine error handling.
+
+   Any error detected by the BGP Finite State Machine (e.g., receipt of
+   an unexpected event) is indicated by sending the NOTIFICATION message
+   with Error Code Finite State Machine Error.
+
+
+
+
+
+Rekhter & Li                                                   [Page 27]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+6.7 Cease.
+
+   In absence of any fatal errors (that are indicated in this section),
+   a BGP peer may choose at any given time to close its BGP connection
+   by sending the NOTIFICATION message with Error Code Cease.  However,
+   the Cease NOTIFICATION message must not be used when a fatal error
+   indicated by this section does exist.
+
+6.8 Connection collision detection.
+
+   If a pair of BGP speakers try simultaneously to establish a TCP
+   connection to each other, then two parallel connections between this
+   pair of speakers might well be formed.  We refer to this situation as
+   connection collision.  Clearly, one of these connections must be
+   closed.
+
+   Based on the value of the BGP Identifier a convention is established
+   for detecting which BGP connection is to be preserved when a
+   collision does occur. The convention is to compare the BGP
+   Identifiers of the peers involved in the collision and to retain only
+   the connection initiated by the BGP speaker with the higher-valued
+   BGP Identifier.
+
+   Upon receipt of an OPEN message, the local system must examine all of
+   its connections that are in the OpenConfirm state.  A BGP speaker may
+   also examine connections in an OpenSent state if it knows the BGP
+   Identifier of the peer by means outside of the protocol.  If among
+   these connections there is a connection to a remote BGP speaker whose
+   BGP Identifier equals the one in the OPEN message, then the local
+   system performs the following collision resolution procedure:
+
+      1. The BGP Identifier of the local system is compared to the BGP
+      Identifier of the remote system (as specified in the OPEN
+      message).
+
+      2. If the value of the local BGP Identifier is less than the
+      remote one, the local system closes BGP connection that already
+      exists (the one that is already in the OpenConfirm state), and
+      accepts BGP connection initiated by the remote system.
+
+      3. Otherwise, the local system closes newly created BGP connection
+      (the one associated with the newly received OPEN message), and
+      continues to use the existing one (the one that is already in the
+      OpenConfirm state).
+
+      Comparing BGP Identifiers is done by treating them as (4-octet
+      long) unsigned integers.
+
+
+
+
+Rekhter & Li                                                   [Page 28]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+      A connection collision with an existing BGP connection that is in
+      Established states causes unconditional closing of the newly
+      created connection. Note that a connection collision cannot be
+      detected with connections that are in Idle, or Connect, or Active
+      states.
+
+      Closing the BGP connection (that results from the collision
+      resolution procedure) is accomplished by sending the NOTIFICATION
+      message with the Error Code Cease.
+
+7.  BGP Version Negotiation.
+
+   BGP speakers may negotiate the version of the protocol by making
+   multiple attempts to open a BGP connection, starting with the highest
+   version number each supports.  If an open attempt fails with an Error
+   Code OPEN Message Error, and an Error Subcode Unsupported Version
+   Number, then the BGP speaker has available the version number it
+   tried, the version number its peer tried, the version number passed
+   by its peer in the NOTIFICATION message, and the version numbers that
+   it supports.  If the two peers do support one or more common
+   versions, then this will allow them to rapidly determine the highest
+   common version. In order to support BGP version negotiation, future
+   versions of BGP must retain the format of the OPEN and NOTIFICATION
+   messages.
+
+8.  BGP Finite State machine.
+
+   This section specifies BGP operation in terms of a Finite State
+   Machine (FSM).  Following is a brief summary and overview of BGP
+   operations by state as determined by this FSM.  A condensed version
+   of the BGP FSM is found in Appendix 1.
+
+      Initially BGP is in the Idle state.
+
+      Idle state:
+
+         In this state BGP refuses all incoming BGP connections.  No
+         resources are allocated to the peer.  In response to the Start
+         event (initiated by either system or operator) the local system
+         initializes all BGP resources, starts the ConnectRetry timer,
+         initiates a transport connection to other BGP peer, while
+         listening for connection that may be initiated by the remote
+         BGP peer, and changes its state to Connect.  The exact value of
+         the ConnectRetry timer is a local matter, but should be
+         sufficiently large to allow TCP initialization.
+
+         If a BGP speaker detects an error, it shuts down the connection
+         and changes its state to Idle. Getting out of the Idle state
+
+
+
+Rekhter & Li                                                   [Page 29]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+         requires generation of the Start event.  If such an event is
+         generated automatically, then persistent BGP errors may result
+         in persistent flapping of the speaker.  To avoid such a
+         condition it is recommended that Start events should not be
+         generated immediately for a peer that was previously
+         transitioned to Idle due to an error. For a peer that was
+         previously transitioned to Idle due to an error, the time
+         between consecutive generation of Start events, if such events
+         are generated automatically, shall exponentially increase. The
+         value of the initial timer shall be 60 seconds. The time shall
+         be doubled for each consecutive retry.
+
+         Any other event received in the Idle state is ignored.
+
+      Connect state:
+
+         In this state BGP is waiting for the transport protocol
+         connection to be completed.
+
+         If the transport protocol connection succeeds, the local system
+         clears the ConnectRetry timer, completes initialization, sends
+         an OPEN message to its peer, and changes its state to OpenSent.
+
+         If the transport protocol connect fails (e.g., retransmission
+         timeout), the local system restarts the ConnectRetry timer,
+         continues to listen for a connection that may be initiated by
+         the remote BGP peer, and changes its state to Active state.
+
+         In response to the ConnectRetry timer expired event, the local
+         system restarts the ConnectRetry timer, initiates a transport
+         connection to other BGP peer, continues to listen for a
+         connection that may be initiated by the remote BGP peer, and
+         stays in the Connect state.
+
+         Start event is ignored in the Active state.
+
+         In response to any other event (initiated by either system or
+         operator), the local system releases all BGP resources
+         associated with this connection and changes its state to Idle.
+
+      Active state:
+
+         In this state BGP is trying to acquire a peer by initiating a
+         transport protocol connection.
+
+         If the transport protocol connection succeeds, the local system
+         clears the ConnectRetry timer, completes initialization, sends
+         an OPEN message to its peer, sets its Hold Timer to a large
+
+
+
+Rekhter & Li                                                   [Page 30]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+         value, and changes its state to OpenSent.  A Hold Timer value
+         of 4 minutes is suggested.
+
+         In response to the ConnectRetry timer expired event, the local
+         system restarts the ConnectRetry timer, initiates a transport
+         connection to other BGP peer, continues to listen for a
+         connection that may be initiated by the remote BGP peer, and
+         changes its state to Connect.
+
+         If the local system detects that a remote peer is trying to
+         establish BGP connection to it, and the IP address of the
+         remote peer is not an expected one, the local system restarts
+         the ConnectRetry timer, rejects the attempted connection,
+         continues to listen for a connection that may be initiated by
+         the remote BGP peer, and stays in the Active state.
+
+         Start event is ignored in the Active state.
+
+         In response to any other event (initiated by either system or
+         operator), the local system releases all BGP resources
+         associated with this connection and changes its state to Idle.
+
+      OpenSent state:
+
+         In this state BGP waits for an OPEN message from its peer.
+         When an OPEN message is received, all fields are checked for
+         correctness.  If the BGP message header checking or OPEN
+         message checking detects an error (see Section 6.2), or a
+         connection collision (see Section 6.8) the local system sends a
+         NOTIFICATION message and changes its state to Idle.
+
+         If there are no errors in the OPEN message, BGP sends a
+         KEEPALIVE message and sets a KeepAlive timer.  The Hold Timer,
+         which was originally set to a large value (see above), is
+         replaced with the negotiated Hold Time value (see section 4.2).
+         If the negotiated Hold Time value is zero, then the Hold Time
+         timer and KeepAlive timers are not started.  If the value of
+         the Autonomous System field is the same as the local Autonomous
+         System number, then the connection is an "internal" connection;
+         otherwise, it is "external".  (This will effect UPDATE
+         processing as described below.)  Finally, the state is changed
+         to OpenConfirm.
+
+         If a disconnect notification is received from the underlying
+         transport protocol, the local system closes the BGP connection,
+         restarts the ConnectRetry timer, while continue listening for
+         connection that may be initiated by the remote BGP peer, and
+         goes into the Active state.
+
+
+
+Rekhter & Li                                                   [Page 31]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+         If the Hold Timer expires, the local system sends NOTIFICATION
+         message with error code Hold Timer Expired and changes its
+         state to Idle.
+
+         In response to the Stop event (initiated by either system or
+         operator) the local system sends NOTIFICATION message with
+         Error Code Cease and changes its state to Idle.
+
+         Start event is ignored in the OpenSent state.
+
+         In response to any other event the local system sends
+         NOTIFICATION message with Error Code Finite State Machine Error
+         and changes its state to Idle.
+
+         Whenever BGP changes its state from OpenSent to Idle, it closes
+         the BGP (and transport-level) connection and releases all
+         resources associated with that connection.
+
+      OpenConfirm state:
+
+         In this state BGP waits for a KEEPALIVE or NOTIFICATION
+         message.
+
+         If the local system receives a KEEPALIVE message, it changes
+         its state to Established.
+
+         If the Hold Timer expires before a KEEPALIVE message is
+         received, the local system sends NOTIFICATION message with
+         error code Hold Timer Expired and changes its state to Idle.
+
+         If the local system receives a NOTIFICATION message, it changes
+         its state to Idle.
+
+         If the KeepAlive timer expires, the local system sends a
+         KEEPALIVE message and restarts its KeepAlive timer.
+
+         If a disconnect notification is received from the underlying
+         transport protocol, the local system changes its state to Idle.
+
+         In response to the Stop event (initiated by either system or
+         operator) the local system sends NOTIFICATION message with
+         Error Code Cease and changes its state to Idle.
+
+         Start event is ignored in the OpenConfirm state.
+
+         In response to any other event the local system sends
+         NOTIFICATION message with Error Code Finite State Machine Error
+         and changes its state to Idle.
+
+
+
+Rekhter & Li                                                   [Page 32]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+         Whenever BGP changes its state from OpenConfirm to Idle, it
+         closes the BGP (and transport-level) connection and releases
+         all resources associated with that connection.
+
+      Established state:
+
+         In the Established state BGP can exchange UPDATE, NOTIFICATION,
+         and KEEPALIVE messages with its peer.
+
+         If the local system receives an UPDATE or KEEPALIVE message, it
+         restarts its Hold Timer, if the negotiated Hold Time value is
+         non-zero.
+
+         If the local system receives a NOTIFICATION message, it changes
+         its state to Idle.
+
+         If the local system receives an UPDATE message and the UPDATE
+         message error handling procedure (see Section 6.3) detects an
+         error, the local system sends a NOTIFICATION message and
+         changes its state to Idle.
+
+         If a disconnect notification is received from the underlying
+         transport protocol, the local system changes its state to Idle.
+
+         If the Hold Timer expires, the local system sends a
+         NOTIFICATION message with Error Code Hold Timer Expired and
+         changes its state to Idle.
+
+         If the KeepAlive timer expires, the local system sends a
+         KEEPALIVE message and restarts its KeepAlive timer.
+
+         Each time the local system sends a KEEPALIVE or UPDATE message,
+         it restarts its KeepAlive timer, unless the negotiated Hold
+         Time value is zero.
+
+         In response to the Stop event (initiated by either system or
+         operator), the local system sends a NOTIFICATION message with
+         Error Code Cease and changes its state to Idle.
+
+         Start event is ignored in the Established state.
+
+         In response to any other event, the local system sends
+         NOTIFICATION message with Error Code Finite State Machine Error
+         and changes its state to Idle.
+
+         Whenever BGP changes its state from Established to Idle, it
+         closes the BGP (and transport-level) connection, releases all
+         resources associated with that connection, and deletes all
+
+
+
+Rekhter & Li                                                   [Page 33]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+         routes derived from that connection.
+
+9.  UPDATE Message Handling
+
+   An UPDATE message may be received only in the Established state.
+   When an UPDATE message is received, each field is checked for
+   validity as specified in Section 6.3.
+
+   If an optional non-transitive attribute is unrecognized, it is
+   quietly ignored.  If an optional transitive attribute is
+   unrecognized, the Partial bit (the third high-order bit) in the
+   attribute flags octet is set to 1, and the attribute is retained for
+   propagation to other BGP speakers.
+
+   If an optional attribute is recognized, and has a valid value, then,
+   depending on the type of the optional attribute, it is processed
+   locally, retained, and updated, if necessary, for possible
+   propagation to other BGP speakers.
+
+   If the UPDATE message contains a non-empty WITHDRAWN ROUTES field,
+   the previously advertised routes whose destinations (expressed as IP
+   prefixes) contained in this field shall be removed from the Adj-RIB-
+   In.  This BGP speaker shall run its Decision Process since the
+   previously advertised route is not longer available for use.
+
+   If the UPDATE message contains a feasible route, it shall be placed
+   in the appropriate Adj-RIB-In, and the following additional actions
+   shall be taken:
+
+   i) If its Network Layer Reachability Information (NLRI) is identical
+   to the one of a route currently stored in the Adj-RIB-In, then the
+   new route shall replace the older route in the Adj-RIB-In, thus
+   implicitly withdrawing the older route from service. The BGP speaker
+   shall run its Decision Process since the older route is no longer
+   available for use.
+
+   ii) If the new route is an overlapping route that is included (see
+   9.1.4) in an earlier route contained in the Adj-RIB-In, the BGP
+   speaker shall run its Decision Process since the more specific route
+   has implicitly made a portion of the less specific route unavailable
+   for use.
+
+   iii) If the new route has identical path attributes to an earlier
+   route contained in the Adj-RIB-In, and is more specific (see 9.1.4)
+   than the earlier route, no further actions are necessary.
+
+   iv) If the new route has NLRI that is not present in any of the
+   routes currently stored in the Adj-RIB-In, then the new route shall
+
+
+
+Rekhter & Li                                                   [Page 34]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+   be placed in the Adj-RIB-In. The BGP speaker shall run its Decision
+   Process.
+
+   v) If the new route is an overlapping route that is less specific
+   (see 9.1.4) than an earlier route contained in the Adj-RIB-In, the
+   BGP speaker shall run its Decision Process on the set of destinations
+   described only by the less specific route.
+
+9.1 Decision Process
+
+   The Decision Process selects routes for subsequent advertisement by
+   applying the policies in the local Policy Information Base (PIB) to
+   the routes stored in its Adj-RIB-In. The output of the Decision
+   Process is the set of routes that will be advertised to all peers;
+   the selected routes will be stored in the local speaker's Adj-RIB-
+   Out.
+
+   The selection process is formalized by defining a function that takes
+   the attribute of a given route as an argument and returns a non-
+   negative integer denoting the degree of preference for the route.
+   The function that calculates the degree of preference for a given
+   route shall not use as its inputs any of the following:  the
+   existence of other routes, the non-existence of other routes, or the
+   path attributes of other routes. Route selection then consists of
+   individual application of the degree of preference function to each
+   feasible route, followed by the choice of the one with the highest
+   degree of preference.
+
+   The Decision Process operates on routes contained in each Adj-RIB-In,
+   and is responsible for:
+
+      - selection of routes to be advertised to BGP speakers located in
+      the local speaker's autonomous system
+
+      - selection of routes to be advertised to BGP speakers located in
+      neighboring autonomous systems
+
+      - route aggregation and route information reduction
+
+   The Decision Process takes place in three distinct phases, each
+   triggered by a different event:
+
+      a) Phase 1 is responsible for calculating the degree of preference
+      for each route received from a BGP speaker located in a
+      neighboring autonomous system, and for advertising to the other
+      BGP speakers in the local autonomous system the routes that have
+      the highest degree of preference for each distinct destination.
+
+
+
+
+Rekhter & Li                                                   [Page 35]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+      b) Phase 2 is invoked on completion of phase 1. It is responsible
+      for choosing the best route out of all those available for each
+      distinct destination, and for installing each chosen route into
+      the appropriate Loc-RIB.
+
+      c) Phase 3 is invoked after the Loc-RIB has been modified. It is
+      responsible for disseminating routes in the Loc-RIB to each peer
+      located in a neighboring autonomous system, according to the
+      policies contained in the PIB. Route aggregation and information
+      reduction can optionally be performed within this phase.
+
+9.1.1 Phase 1: Calculation of Degree of Preference
+
+   The Phase 1 decision function shall be invoked whenever the local BGP
+   speaker receives an UPDATE message from a peer located in a
+   neighboring autonomous system that advertises a new route, a
+   replacement route, or a withdrawn route.
+
+   The Phase 1 decision function is a separate process which completes
+   when it has no further work to do.
+
+   The Phase 1 decision function shall lock an Adj-RIB-In prior to
+   operating on any route contained within it, and shall unlock it after
+   operating on all new or unfeasible routes contained within it.
+
+   For each newly received or replacement feasible route, the local BGP
+   speaker shall determine a degree of preference. If the route is
+   learned from a BGP speaker in the local autonomous system, either the
+   value of the LOCAL_PREF attribute shall be taken as the degree of
+   preference, or the local system shall compute the degree of
+   preference of the route based on preconfigured policy information. If
+   the route is learned from a BGP speaker in a neighboring autonomous
+   system, then the degree of preference shall be computed based on
+   preconfigured policy information.  The exact nature of this policy
+   information and the computation involved is a local matter.  The
+   local speaker shall then run the internal update process of 9.2.1 to
+   select and advertise the most preferable route.
+
+9.1.2 Phase 2: Route Selection
+
+   The Phase 2 decision function shall be invoked on completion of Phase
+   1.  The Phase 2 function is a separate process which completes when
+   it has no further work to do. The Phase 2 process shall consider all
+   routes that are present in the Adj-RIBs-In, including those received
+   from BGP speakers located in its own autonomous system and those
+   received from BGP speakers located in neighboring autonomous systems.
+
+
+
+
+
+Rekhter & Li                                                   [Page 36]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+   The Phase 2 decision function shall be blocked from running while the
+   Phase 3 decision function is in process. The Phase 2 function shall
+   lock all Adj-RIBs-In prior to commencing its function, and shall
+   unlock them on completion.
+
+   If the NEXT_HOP attribute of a BGP route depicts an address to which
+   the local BGP speaker doesn't have a route in its Loc-RIB, the BGP
+   route SHOULD be excluded from the Phase 2 decision function.
+
+   For each set of destinations for which a feasible route exists in the
+   Adj-RIBs-In, the local BGP speaker shall identify the route that has:
+
+      a) the highest degree of preference of any route to the same set
+      of destinations, or
+
+      b) is the only route to that destination, or
+
+      c) is selected as a result of the Phase 2 tie breaking rules
+      specified in 9.1.2.1.
+
+   The local speaker SHALL then install that route in the Loc-RIB,
+   replacing any route to the same destination that is currently being
+   held in the Loc-RIB. The local speaker MUST determine the immediate
+   next hop to the address depicted by the NEXT_HOP attribute of the
+   selected route by performing a lookup in the IGP and selecting one of
+   the possible paths in the IGP.  This immediate next hop MUST be used
+   when installing the selected route in the Loc-RIB.  If the route to
+   the address depicted by the NEXT_HOP attribute changes such that the
+   immediate next hop changes, route selection should be recalculated as
+   specified above.
+
+   Unfeasible routes shall be removed from the Loc-RIB, and
+   corresponding unfeasible routes shall then be removed from the Adj-
+   RIBs-In.
+
+9.1.2.1 Breaking Ties (Phase 2)
+
+   In its Adj-RIBs-In a BGP speaker may have several routes to the same
+   destination that have the same degree of preference. The local
+   speaker can select only one of these routes for inclusion in the
+   associated Loc-RIB. The local speaker considers all equally
+   preferable routes, both those received from BGP speakers located in
+   neighboring autonomous systems, and those received from other BGP
+   speakers located in the local speaker's autonomous system.
+
+   The following tie-breaking procedure assumes that for each candidate
+   route all the BGP speakers within an autonomous system can ascertain
+   the cost of a path (interior distance) to the address depicted by the
+
+
+
+Rekhter & Li                                                   [Page 37]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+   NEXT_HOP attribute of the route.  Ties shall be broken according to
+   the following algorithm:
+
+      a) If the local system is configured to take into account
+      MULTI_EXIT_DISC, and the candidate routes differ in their
+      MULTI_EXIT_DISC attribute, select the route that has the lowest
+      value of the MULTI_EXIT_DISC attribute.
+
+      b) Otherwise, select the route that has the lowest cost (interior
+      distance) to the entity depicted by the NEXT_HOP attribute of the
+      route.  If there are several routes with the same cost, then the
+      tie-breaking shall be broken as follows:
+
+         - if at least one of the candidate routes was advertised by the
+         BGP speaker in a neighboring autonomous system, select the
+         route that was advertised by the BGP speaker in a neighboring
+         autonomous system whose BGP Identifier has the lowest value
+         among all other BGP speakers in neighboring autonomous systems;
+
+         - otherwise, select the route that was advertised by the BGP
+         speaker whose BGP Identifier has the lowest value.
+
+9.1.3   Phase 3: Route Dissemination
+
+   The Phase 3 decision function shall be invoked on completion of Phase
+   2, or when any of the following events occur:
+
+      a) when routes in a Loc-RIB to local destinations have changed
+
+      b) when locally generated routes learned by means outside of BGP
+      have changed
+
+      c) when a new BGP speaker - BGP speaker connection has been
+      established
+
+   The Phase 3 function is a separate process which completes when it
+   has no further work to do. The Phase 3 Routing Decision function
+   shall be blocked from running while the Phase 2 decision function is
+   in process.
+
+   All routes in the Loc-RIB shall be processed into a corresponding
+   entry in the associated Adj-RIBs-Out. Route aggregation and
+   information reduction techniques (see 9.2.4.1) may optionally be
+   applied.
+
+   For the benefit of future support of inter-AS multicast capabilities,
+   a BGP speaker that participates in inter-AS multicast routing shall
+   advertise a route it receives from one of its external peers and if
+
+
+
+Rekhter & Li                                                   [Page 38]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+   it installs it in its Loc-RIB, it shall advertise it back to the peer
+   from which the route was received. For a BGP speaker that does not
+   participate in inter-AS multicast routing such an advertisement is
+   optional. When doing such an advertisement, the NEXT_HOP attribute
+   should be set to the address of the peer. An implementation may also
+   optimize such an advertisement by truncating information in the
+   AS_PATH attribute to include only its own AS number and that of the
+   peer that advertised the route (such truncation requires the ORIGIN
+   attribute to be set to INCOMPLETE).  In addition an implementation is
+   not required to pass optional or discretionary path attributes with
+   such an advertisement.
+
+   When the updating of the Adj-RIBs-Out and the Forwarding Information
+   Base (FIB) is complete, the local BGP speaker shall run the external
+   update process of 9.2.2.
+
+9.1.4 Overlapping Routes
+
+   A BGP speaker may transmit routes with overlapping Network Layer
+   Reachability Information (NLRI) to another BGP speaker. NLRI overlap
+   occurs when a set of destinations are identified in non-matching
+   multiple routes. Since BGP encodes NLRI using IP prefixes, overlap
+   will always exhibit subset relationships.  A route describing a
+   smaller set of destinations (a longer prefix) is said to be more
+   specific than a route describing a larger set of destinations (a
+   shorted prefix); similarly, a route describing a larger set of
+   destinations (a shorter prefix) is said to be less specific than a
+   route describing a smaller set of destinations (a longer prefix).
+
+   The precedence relationship effectively decomposes less specific
+   routes into two parts:
+
+      -  a set of destinations described only by the less specific
+      route, and
+
+      -  a set of destinations described by the overlap of the less
+      specific and the more specific routes
+
+   When overlapping routes are present in the same Adj-RIB-In, the more
+   specific route shall take precedence, in order from more specific to
+   least specific.
+
+   The set of destinations described by the overlap represents a portion
+   of the less specific route that is feasible, but is not currently in
+   use.  If a more specific route is later withdrawn, the set of
+   destinations described by the overlap will still be reachable using
+   the less specific route.
+
+
+
+
+Rekhter & Li                                                   [Page 39]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+   If a BGP speaker receives overlapping routes, the Decision Process
+   shall take into account the semantics of the overlapping routes. In
+   particular, if a BGP speaker accepts the less specific route while
+   rejecting the more specific route from the same peer, then the
+   destinations represented by the overlap may not forward along the ASs
+   listed in the AS_PATH attribute of that route. Therefore, a BGP
+   speaker has the following choices:
+
+      a)   Install both the less and the more specific routes
+
+      b)   Install the more specific route only
+
+      c)   Install the non-overlapping part of the less specific
+                 route only (that implies de-aggregation)
+
+      d)   Aggregate the two routes and install the aggregated route
+
+      e)   Install the less specific route only
+
+      f)   Install neither route
+
+   If a BGP speaker chooses e), then it should add ATOMIC_AGGREGATE
+   attribute to the route. A route that carries ATOMIC_AGGREGATE
+   attribute can not be de-aggregated. That is, the NLRI of this route
+   can not be made more specific.  Forwarding along such a route does
+   not guarantee that IP packets will actually traverse only ASs listed
+   in the AS_PATH attribute of the route.  If a BGP speaker chooses a),
+   it must not advertise the more general route without the more
+   specific route.
+
+9.2 Update-Send Process
+
+   The Update-Send process is responsible for advertising UPDATE
+   messages to all peers. For example, it distributes the routes chosen
+   by the Decision Process to other BGP speakers which may be located in
+   either the same autonomous system or a neighboring autonomous system.
+   rules for information exchange between BGP speakers located in
+   different autonomous systems are given in 9.2.2; rules for
+   information exchange between BGP speakers located in the same
+   autonomous system are given in 9.2.1.
+
+   Distribution of routing information between a set of BGP speakers,
+   all of which are located in the same autonomous system, is referred
+   to as internal distribution.
+
+
+
+
+
+
+
+Rekhter & Li                                                   [Page 40]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+9.2.1 Internal Updates
+
+   The Internal update process is concerned with the distribution of
+   routing information to BGP speakers located in the local speaker's
+   autonomous system.
+
+   When a BGP speaker receives an UPDATE message from another BGP
+   speaker located in its own autonomous system, the receiving BGP
+   speaker shall not re-distribute the routing information contained in
+   that UPDATE message to other BGP speakers located in its own
+   autonomous system.
+
+   When a BGP speaker receives a new route from a BGP speaker in a
+   neighboring autonomous system, it shall advertise that route to all
+   other BGP speakers in its autonomous system by means of an UPDATE
+   message if any of the following conditions occur:
+
+      1) the degree of preference assigned to the newly received route
+      by the local BGP speaker is higher than the degree of preference
+      that the local speaker has assigned to other routes that have been
+      received from BGP speakers in neighboring autonomous systems, or
+
+      2) there are no other routes that have been received from BGP
+      speakers in neighboring autonomous systems, or
+
+      3) the newly received route is selected as a result of breaking a
+      tie between several routes which have the highest degree of
+      preference, and the same destination (the tie-breaking procedure
+      is specified in 9.2.1.1).
+
+   When a BGP speaker receives an UPDATE message with a non-empty
+   WITHDRAWN ROUTES field, it shall remove from its Adj-RIB-In all
+   routes whose destinations was carried in this field (as IP prefixes).
+   The speaker shall take the following additional steps:
+
+      1) if the corresponding feasible route had not been previously
+      advertised, then no further action is necessary
+
+      2) if the corresponding feasible route had been previously
+      advertised, then:
+
+         i) if a new route is selected for advertisement that has the
+         same Network Layer Reachability Information as the unfeasible
+         routes, then the local BGP speaker shall advertise the
+         replacement route
+
+         ii) if a replacement route is not available for advertisement,
+         then the BGP speaker shall include the destinations  of the
+
+
+
+Rekhter & Li                                                   [Page 41]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+         unfeasible route (in form of IP prefixes) in the WITHDRAWN
+         ROUTES field of an UPDATE message, and shall send this message
+         to each peer to whom it had previously advertised the
+         corresponding feasible route.
+
+   All feasible routes which are advertised shall be placed in the
+   appropriate Adj-RIBs-Out, and all unfeasible routes which are
+   advertised shall be removed from the Adj-RIBs-Out.
+
+9.2.1.1 Breaking Ties (Internal Updates)
+
+   If a local BGP speaker has connections to several BGP speakers in
+   neighboring autonomous systems, there will be multiple Adj-RIBs-In
+   associated with these peers. These Adj-RIBs-In might contain several
+   equally preferable routes to the same destination, all of which were
+   advertised by BGP speakers located in neighboring autonomous systems.
+   The local BGP speaker shall select one of these routes according to
+   the following rules:
+
+      a) If the candidate route differ only in their NEXT_HOP and
+      MULTI_EXIT_DISC attributes, and the local system is configured to
+      take into account MULTI_EXIT_DISC attribute, select the routes
+      that has the lowest value of the MULTI_EXIT_DISC attribute.
+
+      b) If the local system can ascertain the cost of a path to the
+      entity depicted by the NEXT_HOP attribute of the candidate route,
+      select the route with the lowest cost.
+
+      c) In all other cases, select the route that was advertised by the
+      BGP speaker whose BGP Identifier has the lowest value.
+
+9.2.2 External Updates
+
+   The external update process is concerned with the distribution of
+   routing information to BGP speakers located in neighboring autonomous
+   systems. As part of Phase 3 route selection process, the BGP speaker
+   has updated its Adj-RIBs-Out and its Forwarding Table. All newly
+   installed routes and all newly unfeasible routes for which there is
+   no replacement route shall be advertised to BGP speakers located in
+   neighboring autonomous systems by means of UPDATE message.
+
+   Any routes in the Loc-RIB marked as unfeasible shall be removed.
+   Changes to the reachable destinations within its own autonomous
+   system shall also be advertised in an UPDATE message.
+
+
+
+
+
+
+
+Rekhter & Li                                                   [Page 42]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+9.2.3 Controlling Routing Traffic Overhead
+
+   The BGP protocol constrains the amount of routing traffic (that is,
+   UPDATE messages) in order to limit both the link bandwidth needed to
+   advertise UPDATE messages and the processing power needed by the
+   Decision Process to digest the information contained in the UPDATE
+   messages.
+
+9.2.3.1 Frequency of Route Advertisement
+
+   The parameter MinRouteAdvertisementInterval determines the minimum
+   amount of time that must elapse between advertisement of routes to a
+   particular destination from a single BGP speaker. This rate limiting
+   procedure applies on a per-destination basis, although the value of
+   MinRouteAdvertisementInterval is set on a per BGP peer basis.
+
+   Two UPDATE messages sent from a single BGP speaker that advertise
+   feasible routes to some common set of destinations received from BGP
+   speakers in neighboring autonomous systems must be separated by at
+   least MinRouteAdvertisementInterval. Clearly, this can only be
+   achieved precisely by keeping a separate timer for each common set of
+   destinations. This would be unwarranted overhead. Any technique which
+   ensures that the interval between two UPDATE messages sent from a
+   single BGP speaker that advertise feasible routes to some common set
+   of destinations received from BGP speakers in neighboring autonomous
+   systems will be at least MinRouteAdvertisementInterval, and will also
+   ensure a constant upper bound on the interval is acceptable.
+
+   Since fast convergence is needed within an autonomous system, this
+   procedure does not apply for routes receives from other BGP speakers
+   in the same autonomous system. To avoid long-lived black holes, the
+   procedure does not apply to the explicit withdrawal of unfeasible
+   routes (that is, routes whose destinations (expressed as IP prefixes)
+   are listed in the WITHDRAWN ROUTES field of an UPDATE message).
+
+   This procedure does not limit the rate of route selection, but only
+   the rate of route advertisement. If new routes are selected multiple
+   times while awaiting the expiration of MinRouteAdvertisementInterval,
+   the last route selected shall be advertised at the end of
+   MinRouteAdvertisementInterval.
+
+9.2.3.2 Frequency of Route Origination
+
+   The parameter MinASOriginationInterval determines the minimum amount
+   of time that must elapse between successive advertisements of UPDATE
+   messages that report changes within the advertising BGP speaker's own
+   autonomous systems.
+
+
+
+
+Rekhter & Li                                                   [Page 43]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+9.2.3.3 Jitter
+
+   To minimize the likelihood that the distribution of BGP messages by a
+   given BGP speaker will contain peaks, jitter should be applied to the
+   timers associated with MinASOriginationInterval, Keepalive, and
+   MinRouteAdvertisementInterval. A given BGP speaker shall apply the
+   same jitter to each of these quantities regardless of the
+   destinations to which the updates are being sent; that is, jitter
+   will not be applied on a "per peer" basis.
+
+   The amount of jitter to be introduced shall be determined by
+   multiplying the base value of the appropriate timer by a random
+   factor which is uniformly distributed in the range from 0.75 to 1.0.
+
+9.2.4 Efficient Organization of Routing Information
+
+   Having selected the routing information which it will advertise, a
+   BGP speaker may avail itself of several methods to organize this
+   information in an efficient manner.
+
+9.2.4.1 Information Reduction
+
+   Information reduction may imply a reduction in granularity of policy
+   control - after information is collapsed, the same policies will
+   apply to all destinations and paths in the equivalence class.
+
+   The Decision Process may optionally reduce the amount of information
+   that it will place in the Adj-RIBs-Out by any of the following
+   methods:
+
+      a)   Network Layer Reachability Information (NLRI):
+
+      Destination IP addresses can be represented as IP address
+      prefixes.  In cases where there is a correspondence between the
+      address structure and the systems under control of an autonomous
+      system administrator, it will be possible to reduce the size of
+      the NLRI carried in the UPDATE messages.
+
+      b)   AS_PATHs:
+
+      AS path information can be represented as ordered AS_SEQUENCEs or
+      unordered AS_SETs. AS_SETs are used in the route aggregation
+      algorithm described in 9.2.4.2. They reduce the size of the
+      AS_PATH information by listing each AS number only once,
+      regardless of how many times it may have appeared in multiple
+      AS_PATHs that were aggregated.
+
+
+
+
+
+Rekhter & Li                                                   [Page 44]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+      An AS_SET implies that the destinations listed in the NLRI can be
+      reached through paths that traverse at least some of the
+      constituent autonomous systems. AS_SETs provide sufficient
+      information to avoid routing information looping; however their
+      use may prune potentially feasible paths, since such paths are no
+      longer listed individually as in the form of AS_SEQUENCEs.  In
+      practice this is not likely to be a problem, since once an IP
+      packet arrives at the edge of a group of autonomous systems, the
+      BGP speaker at that point is likely to have more detailed path
+      information and can distinguish individual paths to destinations.
+
+9.2.4.2 Aggregating Routing Information
+
+   Aggregation is the process of combining the characteristics of
+   several different routes in such a way that a single route can be
+   advertised.  Aggregation can occur as part of the decision  process
+   to reduce the amount of routing information that will be placed in
+   the Adj-RIBs-Out.
+
+   Aggregation reduces the amount of information that a BGP speaker must
+   store and exchange with other BGP speakers. Routes can be aggregated
+   by applying the following procedure separately to path attributes of
+   like type and to the Network Layer Reachability Information.
+
+   Routes that have the following attributes shall not be aggregated
+   unless the corresponding attributes of each route are identical:
+   MULTI_EXIT_DISC, NEXT_HOP.
+
+   Path attributes that have different type codes can not be aggregated
+   together. Path of the same type code may be aggregated, according to
+   the following rules:
+
+      ORIGIN attribute: If at least one route among routes that are
+      aggregated has ORIGIN with the value INCOMPLETE, then the
+      aggregated route must have the ORIGIN attribute with the value
+      INCOMPLETE. Otherwise, if at least one route among routes that are
+      aggregated has ORIGIN with the value EGP, then the aggregated
+      route must have the origin attribute with the value EGP. In all
+      other case the value of the ORIGIN attribute of the aggregated
+      route is INTERNAL.
+
+      AS_PATH attribute: If routes to be aggregated have identical
+      AS_PATH attributes, then the aggregated route has the same AS_PATH
+      attribute as each individual route.
+
+      For the purpose of aggregating AS_PATH attributes we model each AS
+      within the AS_PATH attribute as a tuple <type, value>, where
+      "type" identifies a type of the path segment the AS belongs to
+
+
+
+Rekhter & Li                                                   [Page 45]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+      (e.g. AS_SEQUENCE, AS_SET), and "value" is the AS number.  If the
+      routes to be aggregated have different AS_PATH attributes, then
+      the aggregated AS_PATH attribute shall satisfy all of the
+      following conditions:
+
+         - all tuples of the type AS_SEQUENCE in the aggregated AS_PATH
+         shall appear in all of the AS_PATH in the initial set of routes
+         to be aggregated.
+
+         - all tuples of the type AS_SET in the aggregated AS_PATH shall
+         appear in at least one of the AS_PATH in the initial set (they
+         may appear as either AS_SET or AS_SEQUENCE types).
+
+         - for any tuple X of the type AS_SEQUENCE in the aggregated
+         AS_PATH which precedes tuple Y in the aggregated AS_PATH, X
+         precedes Y in each AS_PATH in the initial set which contains Y,
+         regardless of the type of Y.
+
+         - No tuple with the same value shall appear more than once in
+         the aggregated AS_PATH, regardless of the tuple's type.
+
+      An implementation may choose any algorithm which conforms to these
+      rules.  At a minimum a conformant implementation shall be able to
+      perform the following algorithm that meets all of the above
+      conditions:
+
+         - determine the longest leading sequence of tuples (as defined
+         above) common to all the AS_PATH attributes of the routes to be
+         aggregated. Make this sequence the leading sequence of the
+         aggregated AS_PATH attribute.
+
+         - set the type of the rest of the tuples from the AS_PATH
+         attributes of the routes to be aggregated to AS_SET, and append
+         them to the aggregated AS_PATH attribute.
+
+         - if the aggregated AS_PATH has more than one tuple with the
+         same value (regardless of tuple's type), eliminate all, but one
+         such tuple by deleting tuples of the type AS_SET from the
+         aggregated AS_PATH attribute.
+
+      Appendix 6, section 6.8 presents another algorithm that satisfies
+      the conditions and  allows for more complex policy configurations.
+
+      ATOMIC_AGGREGATE: If at least one of the routes to be aggregated
+      has ATOMIC_AGGREGATE path attribute, then the aggregated route
+      shall have this attribute as well.
+
+
+
+
+
+Rekhter & Li                                                   [Page 46]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+      AGGREGATOR: All AGGREGATOR attributes of all routes to be
+      aggregated should be ignored.
+
+9.3   Route Selection Criteria
+
+   Generally speaking, additional rules for comparing routes among
+   several alternatives are outside the scope of this document.  There
+   are two exceptions:
+
+      - If the local AS appears in the AS path of the new route being
+      considered, then that new route cannot be viewed as better than
+      any other route.  If such a route were ever used, a routing loop
+      would result.
+
+      - In order to achieve successful distributed operation, only
+      routes with a likelihood of stability can be chosen.  Thus, an AS
+      must avoid using unstable routes, and it must not make rapid
+      spontaneous changes to its choice of route.  Quantifying the terms
+      "unstable" and "rapid" in the previous sentence will require
+      experience, but the principle is clear.
+
+9.4   Originating BGP routes
+
+   A BGP speaker may originate BGP routes by injecting routing
+   information acquired by some other means (e.g. via an IGP) into BGP.
+   A BGP speaker that originates BGP routes shall assign the degree of
+   preference to these routes by passing them through the Decision
+   Process (see Section 9.1).  These routes may also be distributed to
+   other BGP speakers within the local AS as part of the Internal update
+   process (see Section 9.2.1). The decision whether to distribute non-
+   BGP acquired routes within an AS via BGP or not depends on the
+   environment within the AS (e.g. type of IGP) and should be controlled
+   via configuration.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Rekhter & Li                                                   [Page 47]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+Appendix 1.  BGP FSM State Transitions and Actions.
+
+   This Appendix discusses the transitions between states in the BGP FSM
+   in response to BGP events.  The following is the list of these states
+   and events when the negotiated Hold Time value is non-zero.
+
+       BGP States:
+
+                1 - Idle
+                2 - Connect
+                3 - Active
+                4 - OpenSent
+                5 - OpenConfirm
+                6 - Established
+
+       BGP Events:
+
+                1 - BGP Start
+                2 - BGP Stop
+                3 - BGP Transport connection open
+                4 - BGP Transport connection closed
+                5 - BGP Transport connection open failed
+                6 - BGP Transport fatal error
+                7 - ConnectRetry timer expired
+                8 - Hold Timer expired
+                9 - KeepAlive timer expired
+               10 - Receive OPEN message
+               11 - Receive KEEPALIVE message
+               12 - Receive UPDATE messages
+               13 - Receive NOTIFICATION message
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Rekhter & Li                                                   [Page 48]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+   The following table describes the state transitions of the BGP FSM
+   and the actions triggered by these transitions.
+
+
+    Event                Actions               Message Sent   Next State
+    --------------------------------------------------------------------
+    Idle (1)
+     1            Initialize resources            none             2
+                  Start ConnectRetry timer
+                  Initiate a transport connection
+     others               none                    none             1
+
+    Connect(2)
+     1                    none                    none             2
+     3            Complete initialization         OPEN             4
+                  Clear ConnectRetry timer
+     5            Restart ConnectRetry timer      none             3
+     7            Restart ConnectRetry timer      none             2
+                  Initiate a transport connection
+     others       Release resources               none             1
+
+    Active (3)
+     1                    none                    none             3
+     3            Complete initialization         OPEN             4
+                  Clear ConnectRetry timer
+     5            Close connection                                 3
+                  Restart ConnectRetry timer
+     7            Restart ConnectRetry timer      none             2
+                  Initiate a transport connection
+     others       Release resources               none             1
+
+    OpenSent(4)
+     1                    none                    none             4
+     4            Close transport connection      none             3
+                  Restart ConnectRetry timer
+     6            Release resources               none             1
+    10            Process OPEN is OK            KEEPALIVE          5
+                  Process OPEN failed           NOTIFICATION       1
+    others        Close transport connection    NOTIFICATION       1
+                  Release resources
+
+
+
+
+
+
+
+
+
+
+
+Rekhter & Li                                                   [Page 49]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+    OpenConfirm (5)
+     1                   none                     none             5
+     4            Release resources               none             1
+     6            Release resources               none             1
+     9            Restart KeepAlive timer       KEEPALIVE          5
+    11            Complete initialization         none             6
+                  Restart Hold Timer
+    13            Close transport connection                       1
+                  Release resources
+    others        Close transport connection    NOTIFICATION       1
+                  Release resources
+
+    Established (6)
+     1                   none                     none             6
+     4            Release resources               none             1
+     6            Release resources               none             1
+     9            Restart KeepAlive timer       KEEPALIVE          6
+    11            Restart Hold Timer            KEEPALIVE          6
+    12            Process UPDATE is OK          UPDATE             6
+                  Process UPDATE failed         NOTIFICATION       1
+    13            Close transport connection                       1
+                  Release resources
+    others        Close transport connection    NOTIFICATION       1
+                  Release resources
+   ---------------------------------------------------------------------
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Rekhter & Li                                                   [Page 50]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+      The following is a condensed version of the above state transition
+      table.
+
+
+   Events| Idle | Connect | Active | OpenSent | OpenConfirm | Estab
+         | (1)  |   (2)   |  (3)   |    (4)   |     (5)     |   (6)
+         |--------------------------------------------------------------
+    1    |  2   |    2    |   3    |     4    |      5      |    6
+         |      |         |        |          |             |
+    2    |  1   |    1    |   1    |     1    |      1      |    1
+         |      |         |        |          |             |
+    3    |  1   |    4    |   4    |     1    |      1      |    1
+         |      |         |        |          |             |
+    4    |  1   |    1    |   1    |     3    |      1      |    1
+         |      |         |        |          |             |
+    5    |  1   |    3    |   3    |     1    |      1      |    1
+         |      |         |        |          |             |
+    6    |  1   |    1    |   1    |     1    |      1      |    1
+         |      |         |        |          |             |
+    7    |  1   |    2    |   2    |     1    |      1      |    1
+         |      |         |        |          |             |
+    8    |  1   |    1    |   1    |     1    |      1      |    1
+         |      |         |        |          |             |
+    9    |  1   |    1    |   1    |     1    |      5      |    6
+         |      |         |        |          |             |
+   10    |  1   |    1    |   1    |  1 or 5  |      1      |    1
+         |      |         |        |          |             |
+   11    |  1   |    1    |   1    |     1    |      6      |    6
+         |      |         |        |          |             |
+   12    |  1   |    1    |   1    |     1    |      1      | 1 or 6
+         |      |         |        |          |             |
+   13    |  1   |    1    |   1    |     1    |      1      |    1
+         |      |         |        |          |             |
+         ---------------------------------------------------------------
+
+
+Appendix 2. Comparison with RFC1267
+
+   BGP-4 is capable of operating in an environment where a set of
+   reachable destinations may be expressed via a single IP prefix.  The
+   concept of network classes, or subnetting is foreign to BGP-4.  To
+   accommodate these capabilities BGP-4 changes semantics and encoding
+   associated with the AS_PATH attribute. New text has been added to
+   define semantics associated with IP prefixes.  These abilities allow
+   BGP-4 to support the proposed supernetting scheme [9].
+
+   To simplify configuration this version introduces a new attribute,
+   LOCAL_PREF, that facilitates route selection procedures.
+
+
+
+Rekhter & Li                                                   [Page 51]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+   The INTER_AS_METRIC attribute has been renamed to be MULTI_EXIT_DISC.
+   A new attribute, ATOMIC_AGGREGATE, has been introduced to insure that
+   certain aggregates are not de-aggregated.  Another new attribute,
+   AGGREGATOR, can be added to aggregate routes in order to advertise
+   which AS and which BGP speaker within that AS caused the aggregation.
+
+   To insure that Hold Timers are symmetric, the Hold Time is now
+   negotiated on a per-connection basis.  Hold Times of zero are now
+   supported.
+
+Appendix 3.  Comparison with RFC 1163
+
+   All of the changes listed in Appendix 2, plus the following.
+
+   To detect and recover from BGP connection collision, a new field (BGP
+   Identifier) has been added to the OPEN message. New text (Section
+   6.8) has been added to specify the procedure for detecting and
+   recovering from collision.
+
+   The new document no longer restricts the border router that is passed
+   in the NEXT_HOP path attribute to be part of the same Autonomous
+   System as the BGP Speaker.
+
+   New document optimizes and simplifies the exchange of the information
+   about previously reachable routes.
+
+Appendix 4.  Comparison with RFC 1105
+
+   All of the changes listed in Appendices 2 and 3, plus the following.
+
+   Minor changes to the RFC1105 Finite State Machine were necessary to
+   accommodate the TCP user interface provided by 4.3 BSD.
+
+   The notion of Up/Down/Horizontal relations present in RFC1105 has
+   been removed from the protocol.
+
+   The changes in the message format from RFC1105 are as follows:
+
+      1.  The Hold Time field has been removed from the BGP header and
+      added to the OPEN message.
+
+      2.  The version field has been removed from the BGP header and
+      added to the OPEN message.
+
+      3.  The Link Type field has been removed from the OPEN message.
+
+      4.  The OPEN CONFIRM message has been eliminated and replaced with
+      implicit confirmation provided by the KEEPALIVE message.
+
+
+
+Rekhter & Li                                                   [Page 52]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+      5.  The format of the UPDATE message has been changed
+      significantly.  New fields were added to the UPDATE message to
+      support multiple path attributes.
+
+      6.  The Marker field has been expanded and its role broadened to
+      support authentication.
+
+      Note that quite often BGP, as specified in RFC 1105, is referred
+      to as BGP-1, BGP, as specified in RFC 1163, is referred to as
+      BGP-2, BGP, as specified in RFC1267 is referred to as BGP-3, and
+      BGP, as specified in this document is referred to as BGP-4.
+
+Appendix 5.  TCP options that may be used with BGP
+
+   If a local system TCP user interface supports TCP PUSH function, then
+   each BGP message should be transmitted with PUSH flag set.  Setting
+   PUSH flag forces BGP messages to be transmitted promptly to the
+   receiver.
+
+   If a local system TCP user interface supports setting precedence for
+   TCP connection, then the BGP transport connection should be opened
+   with precedence set to Internetwork Control (110) value (see also
+   [6]).
+
+Appendix 6.  Implementation Recommendations
+
+   This section presents some implementation recommendations.
+
+6.1 Multiple Networks Per Message
+
+   The BGP protocol allows for multiple address prefixes with the same
+   AS path and next-hop gateway to be specified in one message. Making
+   use of this capability is highly recommended. With one address prefix
+   per message there is a substantial increase in overhead in the
+   receiver. Not only does the system overhead increase due to the
+   reception of multiple messages, but the overhead of scanning the
+   routing table for updates to BGP peers and other routing protocols
+   (and sending the associated messages) is incurred multiple times as
+   well. One method of building messages containing many address
+   prefixes per AS path and gateway from a routing table that is not
+   organized per AS path is to build many messages as the routing table
+   is scanned. As each address prefix is processed, a message for the
+   associated AS path and gateway is allocated, if it does not exist,
+   and the new address prefix is added to it.  If such a message exists,
+   the new address prefix is just appended to it. If the message lacks
+   the space to hold the new address prefix, it is transmitted, a new
+   message is allocated, and the new address prefix is inserted into the
+   new message. When the entire routing table has been scanned, all
+
+
+
+Rekhter & Li                                                   [Page 53]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+   allocated messages are sent and their resources released.  Maximum
+   compression is achieved when all  the destinations covered by the
+   address prefixes share a gateway and common path attributes, making
+   it possible to send many address prefixes in one 4096-byte message.
+
+   When peering with a BGP implementation that does not compress
+   multiple address prefixes into one message, it may be necessary to
+   take steps to reduce the overhead from the flood of data received
+   when a peer is acquired or a significant network topology change
+   occurs. One method of doing this is to limit the rate of updates.
+   This will eliminate the redundant scanning of the routing table to
+   provide flash updates for BGP peers and other routing protocols. A
+   disadvantage of this approach is that it increases the propagation
+   latency of routing information.  By choosing a minimum flash update
+   interval that is not much greater than the time it takes to process
+   the multiple messages this latency should be minimized. A better
+   method would be to read all received messages before sending updates.
+
+6.2  Processing Messages on a Stream Protocol
+
+   BGP uses TCP as a transport mechanism.  Due to the stream nature of
+   TCP, all the data for received messages does not necessarily arrive
+   at the same time. This can make it difficult to process the data as
+   messages, especially on systems such as BSD Unix where it is not
+   possible to determine how much data has been received but not yet
+   processed.
+
+   One method that can be used in this situation is to first try to read
+   just the message header. For the KEEPALIVE message type, this is a
+   complete message; for other message types, the header should first be
+   verified, in particular the total length. If all checks are
+   successful, the specified length, minus the size of the message
+   header is the amount of data left to read. An implementation that
+   would "hang" the routing information process while trying to read
+   from a peer could set up a message buffer (4096 bytes) per peer and
+   fill it with data as available until a complete message has been
+   received.
+
+6.3 Reducing route flapping
+
+   To avoid excessive route flapping a BGP speaker which needs to
+   withdraw a destination and send an update about a more specific or
+   less specific route shall combine them into the same UPDATE message.
+
+
+
+
+
+
+
+
+Rekhter & Li                                                   [Page 54]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+6.4 BGP Timers
+
+   BGP employs five timers: ConnectRetry, Hold Time, KeepAlive,
+   MinASOriginationInterval, and MinRouteAdvertisementInterval The
+   suggested value for the ConnectRetry timer is 120 seconds.  The
+   suggested value for the Hold Time is 90 seconds.  The suggested value
+   for the KeepAlive timer is 30 seconds.  The suggested value for the
+   MinASOriginationInterval is 15 seconds.  The suggested value for the
+   MinRouteAdvertisementInterval is 30 seconds.
+
+   An implementation of BGP MUST allow these timers to be configurable.
+
+6.5 Path attribute ordering
+
+   Implementations which combine update messages as described above in
+   6.1 may prefer to see all path attributes presented in a known order.
+   This permits them to quickly identify sets of attributes from
+   different update messages which are semantically identical.  To
+   facilitate this, it is a useful optimization to order the path
+   attributes according to type code.  This optimization is entirely
+    optional.
+
+6.6 AS_SET sorting
+
+   Another useful optimization that can be done to simplify this
+   situation is to sort the AS numbers found in an AS_SET.  This
+   optimization is entirely optional.
+
+6.7 Control over version negotiation
+
+   Since BGP-4 is capable of carrying aggregated routes which cannot be
+   properly represented in BGP-3, an implementation which supports BGP-4
+   and another BGP version should provide the capability to only speak
+   BGP-4 on a per-peer basis.
+
+6.8 Complex AS_PATH aggregation
+
+   An implementation which chooses to provide a path aggregation
+   algorithm which retains significant amounts of path information may
+   wish to use the following procedure:
+
+      For the purpose of aggregating AS_PATH attributes of two routes,
+      we model each AS as a tuple <type, value>, where "type" identifies
+      a type of the path segment the AS belongs to (e.g.  AS_SEQUENCE,
+      AS_SET), and "value" is the AS number.  Two ASs are said to be the
+      same if their corresponding <type, value> tuples are the same.
+
+
+
+
+
+Rekhter & Li                                                   [Page 55]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+      The algorithm to aggregate two AS_PATH attributes works as
+      follows:
+
+         a) Identify the same ASs (as defined above) within each AS_PATH
+         attribute that are in the same relative order within both
+         AS_PATH attributes.  Two ASs, X and Y, are said to be in the
+         same order if either:
+
+            - X precedes Y in both AS_PATH attributes, or - Y precedes X
+            in both AS_PATH attributes.
+
+         b) The aggregated AS_PATH attribute consists of ASs identified
+         in (a) in exactly the same order as they appear in the AS_PATH
+         attributes to be aggregated. If two consecutive ASs identified
+         in (a) do not immediately follow each other in both of the
+         AS_PATH attributes to be aggregated, then the intervening ASs
+         (ASs that are between the two consecutive ASs that are the
+         same) in both attributes are combined into an AS_SET path
+         segment that consists of the intervening ASs from both AS_PATH
+         attributes; this segment is then placed in between the two
+         consecutive ASs identified in (a) of the aggregated attribute.
+         If two consecutive ASs identified in (a) immediately follow
+         each other in one attribute, but do not follow in another, then
+         the intervening ASs of the latter are combined into an AS_SET
+         path segment; this segment is then placed in between the two
+         consecutive ASs identified in (a) of the aggregated attribute.
+
+      If as a result of the above procedure a given AS number appears
+      more than once within the aggregated AS_PATH attribute, all, but
+      the last instance (rightmost occurrence) of that AS number should
+      be removed from the aggregated AS_PATH attribute.
+
+References
+
+   [1] Mills, D., "Exterior Gateway Protocol Formal Specification", RFC
+       904, BBN, April 1984.
+
+   [2] Rekhter, Y., "EGP and Policy Based Routing in the New NSFNET
+       Backbone", RFC 1092, T.J. Watson Research Center, February 1989.
+
+   [3] Braun, H-W., "The NSFNET Routing Architecture", RFC 1093,
+       MERIT/NSFNET Project, February 1989.
+
+   [4] Postel, J., "Transmission Control Protocol - DARPA Internet
+       Program Protocol Specification", STD 7, RFC 793, DARPA, September
+       1981.
+
+
+
+
+
+Rekhter & Li                                                   [Page 56]
+
+RFC 1771                         BGP-4                        March 1995
+
+
+   [5] Rekhter, Y., and P. Gross, "Application of the Border Gateway
+       Protocol in the Internet", RFC 1772, T.J. Watson Research Center,
+       IBM Corp., MCI, March 1995.
+
+   [6] Postel, J., "Internet Protocol - DARPA Internet Program Protocol
+       Specification", STD 5, RFC 791, DARPA, September 1981.
+
+   [7] "Information Processing Systems - Telecommunications and
+       Information Exchange between Systems - Protocol for Exchange of
+       Inter-domain Routeing Information among Intermediate Systems to
+       Support Forwarding of ISO 8473 PDUs", ISO/IEC IS10747, 1993
+
+   [8] Fuller, V., Li, T., Yu, J., and K. Varadhan, "Classless Inter-
+       Domain Routing (CIDR): an Address Assignment and Aggregation
+       Strategy", RFC 1519, BARRNet, cisco, MERIT, OARnet, September
+       1993
+
+   [9] Rekhter, Y., Li, T., "An Architecture for IP Address Allocation
+       with CIDR", RFC 1518, T.J. Watson Research Center, cisco,
+       September 1993
+
+Security Considerations
+
+   Security issues are not discussed in this document.
+
+Editors' Addresses
+
+   Yakov Rekhter
+   T.J. Watson Research Center IBM Corporation
+   P.O. Box 704, Office H3-D40
+   Yorktown Heights, NY 10598
+
+   Phone:  +1 914 784 7361
+   EMail:  yakov@watson.ibm.com
+
+
+   Tony Li
+   cisco Systems, Inc.
+   170 W. Tasman Dr.
+   San Jose, CA 95134
+
+   EMail: tli@cisco.com
+
+
+
+
+
+
+
+
+
+Rekhter & Li                                                   [Page 57]
+
diff --git a/doc/rfc4360.txt b/doc/rfc4360.txt
@@ -0,0 +1,675 @@
+
+
+
+
+
+
+Network Working Group                                          S. Sangli
+Request for Comments: 4360                                     D. Tappan
+Category: Standards Track                                  Cisco Systems
+                                                              Y. Rekhter
+                                                        Juniper Networks
+                                                           February 2006
+
+
+                   BGP Extended Communities Attribute
+
+Status of This Memo
+
+   This document specifies an Internet standards track protocol for the
+   Internet community, and requests discussion and suggestions for
+   improvements.  Please refer to the current edition of the "Internet
+   Official Protocol Standards" (STD 1) for the standardization state
+   and status of this protocol.  Distribution of this memo is unlimited.
+
+Copyright Notice
+
+   Copyright (C) The Internet Society (2006).
+
+Abstract
+
+   This document describes the "extended community" BGP-4 attribute.
+   This attribute provides a mechanism for labeling information carried
+   in BGP-4.  These labels can be used to control the distribution of
+   this information, or for other applications.
+
+1.  Introduction
+
+   The Extended Community Attribute provides a mechanism for labeling
+   information carried in BGP-4 [BGP-4].  It provides two important
+   enhancements over the existing BGP Community Attribute [RFC1997]:
+
+      - An extended range, ensuring that communities can be assigned for
+        a plethora of uses, without fear of overlap.
+
+      - The addition of a Type field provides structure for the
+        community space.
+
+   The addition of structure allows the usage of policy based on the
+   application for which the community value will be used.  For example,
+   one can filter out all communities of a particular type, or allow
+   only certain values for a particular type of community.  It also
+   allows one to specify whether a particular community is transitive or
+   non-transitive across an Autonomous System (AS) boundary.  Without
+   structure, this can only be accomplished by explicitly enumerating
+
+
+
+Sangli, et al.              Standards Track                     [Page 1]
+
+RFC 4360           BGP Extended Communities Attribute      February 2006
+
+
+   all community values that will be denied or allowed and passed to BGP
+   speakers in neighboring ASes based on the transitive property.
+
+1.1.  Specification of Requirements
+
+   The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+   "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+   document are to be interpreted as described in RFC 2119 [RFC2119].
+
+2.  BGP Extended Communities Attribute
+
+   The Extended Communities Attribute is a transitive optional BGP
+   attribute, with the Type Code 16.  The attribute consists of a set of
+   "extended communities".  All routes with the Extended Communities
+   attribute belong to the communities listed in the attribute.
+
+   Each Extended Community is encoded as an 8-octet quantity, as
+   follows:
+
+      - Type Field  : 1 or 2 octets
+      - Value Field : Remaining octets
+
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Type high    |  Type low(*)  |                               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+          Value                |
+      |                                                               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+      (*) Present for Extended types only, used for the Value field
+          otherwise.
+
+      Type Field:
+
+         Two classes of Type Field are introduced: Regular type and
+         Extended type.
+
+         The size of Type Field for Regular types is 1 octet, and the
+         size of the Type Field for Extended types is 2 octets.
+
+         The value of the high-order octet of the Type Field determines
+         if an extended community is a Regular type or an Extended type.
+         The class of a type (Regular or Extended) is not encoded in the
+         structure of the type itself.  The class of a type is specified
+         in the document that defines the type and the IANA registry.
+
+
+
+
+
+Sangli, et al.              Standards Track                     [Page 2]
+
+RFC 4360           BGP Extended Communities Attribute      February 2006
+
+
+         The high-order octet of the Type Field is as shown below:
+
+             0 1 2 3 4 5 6 7
+            +-+-+-+-+-+-+-+-+
+            |I|T|           |
+            +-+-+-+-+-+-+-+-+
+
+            I - IANA authority bit
+
+               Value 0: IANA-assignable type using the "First Come First
+               Serve" policy
+
+               Value 1: Part of this Type Field space is for IANA
+               assignable types using either the Standard Action or the
+               Early IANA Allocation policy.  The rest of this Type
+               Field space is for Experimental use.
+
+            T - Transitive bit
+
+               Value 0: The community is transitive across ASes
+
+               Value 1: The community is non-transitive across ASes
+
+            Remaining 6 bits: Indicates the structure of the community
+
+      Value Field:
+
+         The encoding of the Value Field is dependent on the "type" of
+         the community as specified by the Type Field.
+
+   Two extended communities are declared equal only when all 8 octets of
+   the community are equal.
+
+   The two members in the tuple <Type, Value> should be enumerated to
+   specify any community value.  The remaining octets of the community
+   interpreted based on the value of the Type field.
+
+3.  Defined BGP Extended Community Types
+
+   This section introduces a few extended types and defines the format
+   of the Value Field for those types.  The types introduced here
+   provide "templates", where each template is identified by the high-
+   order octet of the extended community Type field, and the lower-order
+   octet (sub-type) is used to indicate a particular type of extended
+   community.
+
+
+
+
+
+
+Sangli, et al.              Standards Track                     [Page 3]
+
+RFC 4360           BGP Extended Communities Attribute      February 2006
+
+
+3.1.  Two-Octet AS Specific Extended Community
+
+   This is an extended type with Type Field composed of 2 octets and
+   Value Field composed of 6 octets.
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   | 0x00 or 0x40  |   Sub-Type    |    Global Administrator       |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                     Local Administrator                       |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   The value of the high-order octet of this extended type is either
+   0x00 or 0x40.  The low-order octet of this extended type is used to
+   indicate sub-types.
+
+   The Value Field consists of two sub-fields:
+
+      Global Administrator sub-field: 2 octets
+
+         This sub-field contains an Autonomous System number assigned by
+         IANA.
+
+      Local Administrator sub-field: 4 octets
+
+         The organization identified by Autonomous System number in the
+         Global Administrator sub-field can encode any information in
+         this sub-field.  The format and meaning of the value encoded in
+         this sub-field should be defined by the sub-type of the
+         community.
+
+3.2.  IPv4 Address Specific Extended Community
+
+   This is an extended type with Type Field composed of 2 octets and
+   Value Field composed of 6 octets.
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   | 0x01 or 0x41  |   Sub-Type    |    Global Administrator       |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   | Global Administrator (cont.)  |    Local Administrator        |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   The value of the high-order octet of this extended type is either
+   0x01 or 0x41.  The low-order octet of this extended type is used to
+   indicate sub-types.
+
+
+
+Sangli, et al.              Standards Track                     [Page 4]
+
+RFC 4360           BGP Extended Communities Attribute      February 2006
+
+
+   The Value field consists of two sub-fields:
+
+      Global Administrator sub-field: 4 octets
+
+         This sub-field contains an IPv4 unicast address assigned by one
+         of the Internet registries.
+
+      Local Administrator sub-field: 2 octets
+
+         The organization that has been assigned the IPv4 address in the
+         Global Administrator sub-field can encode any information in
+         this sub-field.  The format and meaning of this value encoded
+         in this sub-field should be defined by the sub-type of the
+         community.
+
+3.3.  Opaque Extended Community
+
+   This is an extended type with Type Field composed of 2 octets and
+   Value Field composed of 6 octets.
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   | 0x03 or 0x43  |   Sub-Type    |                Value          |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                         Value (cont.)                         |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   The value of the high-order octet of this extended type is either
+   0x03 or 0x43.  The low-order octet of this extended type is used to
+   indicate sub-types.
+
+   This is a generic community of extended type.  The value of the sub-
+   type that should define the Value Field is to be assigned by IANA.
+
+4.  Route Target Community
+
+   The Route Target Community identifies one or more routers that may
+   receive a set of routes (that carry this Community) carried by BGP.
+   This is transitive across the Autonomous System boundary.
+
+   The Route Target Community is of an extended type.
+
+   The value of the high-order octet of the Type field for the Route
+   Target Community can be 0x00, 0x01, or 0x02.  The value of the low-
+   order octet of the Type field for this community is 0x02.
+
+
+
+
+
+Sangli, et al.              Standards Track                     [Page 5]
+
+RFC 4360           BGP Extended Communities Attribute      February 2006
+
+
+   When the value of the high-order octet of the Type field is 0x00 or
+   0x02, the Local Administrator sub-field contains a number from a
+   numbering space that is administered by the organization to which the
+   Autonomous System number carried in the Global Administrator sub-
+   field has been assigned by an appropriate authority.
+
+   When the value of the high-order octet of the Type field is 0x01, the
+   Local Administrator sub-field contains a number from a numbering
+   space that is administered by the organization to which the IP
+   address carried in the Global Administrator sub-field has been
+   assigned by an appropriate authority.
+
+   One possible use of the Route Target Community is specified in
+   [RFC4364].
+
+5.  Route Origin Community
+
+   The Route Origin Community identifies one or more routers that inject
+   a set of routes (that carry this Community) into BGP.  This is
+   transitive across the Autonomous System boundary.
+
+   The Route Origin Community is of an extended type.
+
+   The value of the high-order octet of the Type field for the Route
+   Origin Community can be 0x00, 0x01, or 0x02.  The value of the low-
+   order octet of the Type field for this community is 0x03.
+
+   When the value of the high-order octet of the Type field is 0x00 or
+   0x02, the Local Administrator sub-field contains a number from a
+   numbering space that is administered by the organization to which the
+   Autonomous System number carried in the Global Administrator sub-
+   field has been assigned by an appropriate authority.
+
+   When the value of the high-order octet of the Type field is 0x01, the
+   Local Administrator sub-field contains a number from a numbering
+   space that is administered by the organization to which the IP
+   address carried in the Global Administrator sub-field has been
+   assigned by an appropriate authority.
+
+   One possible use of the Route Origin Community is specified in
+   [RFC4364].
+
+
+
+
+
+
+
+
+
+
+Sangli, et al.              Standards Track                     [Page 6]
+
+RFC 4360           BGP Extended Communities Attribute      February 2006
+
+
+6.  Operations
+
+   A BGP speaker may use the Extended Communities attribute to control
+   which routing information it accepts or distributes to its peers.
+
+   The Extended Community attribute MUST NOT be used to modify the BGP
+   best path selection algorithm in a way that leads to forwarding
+   loops.
+
+   A BGP speaker receiving a route that doesn't have the Extended
+   Communities attribute MAY append this attribute to the route when
+   propagating it to its peers.
+
+   A BGP speaker receiving a route with the Extended Communities
+   attribute MAY modify this attribute according to the local policy.
+
+   By default if a range of routes is to be aggregated and the resultant
+   aggregates path attributes do not carry the ATOMIC_AGGREGATE
+   attribute, then the resulting aggregate should have an Extended
+   Communities path attribute that contains the set union of all the
+   Extended Communities from all of the aggregated routes.  The default
+   behavior could be overridden via local configuration, in which case
+   handling the Extended Communities attribute in the presence of route
+   aggregation becomes a matter of the local policy of the BGP speaker
+   that performs the aggregation.
+
+   If a route has a non-transitivity extended community, then before
+   advertising the route across the Autonomous System boundary the
+   community SHOULD be removed from the route.  However, the community
+   SHOULD NOT be removed when advertising the route across the BGP
+   Confederation boundary.
+
+   A route may carry both the BGP Communities attribute, as defined in
+   [RFC1997]), and the Extended BGP Communities attribute.  In this
+   case, the BGP Communities attribute is handled as specified in
+   [RFC1997], and the Extended BGP Communities attribute is handled as
+   specified in this document.
+
+7.  IANA Considerations
+
+   All the BGP Extended Communities contain a Type field.  The IANA has
+   created a registry entitled, "BGP Extended Communities Type".  The
+   IANA will maintain this registry.
+
+   The Type could be either regular or extended.  For a regular Type the
+   IANA allocates an 8-bit value; for an extended Type the IANA
+   allocates a 16-bit value.
+
+
+
+
+Sangli, et al.              Standards Track                     [Page 7]
+
+RFC 4360           BGP Extended Communities Attribute      February 2006
+
+
+   The value allocated for a regular Type MUST NOT be reused as the
+   value of the high-order octet when allocating an extended Type.  The
+   value of the high-order octet allocated for an extended Type MUST NOT
+   be reused when allocating a regular Type.
+
+   The Type field indicates where the Extended Community is transitive
+   or not.  Future requests for assignment of a Type value must specify
+   whether the Type value is intended for a transitive or a non-
+   transitive Extended Community.
+
+   Future assignment are to be made using either the Standards Action
+   process defined in [RFC2434], the Early IANA Allocation process
+   defined in [RFC4020], or the "First Come First Served" policy defined
+   in [RFC2434].
+
+   The following table summarizes the ranges for the assignment of
+   Types:
+
+      Type                        Standard Action         First Come
+                                  Early IANA Allocation   First Served
+      ------------------          ---------------------   ------------
+
+      regular, transitive          0x90-0xbf              0x00-x3f
+
+      regular, non-transitive      0xd0-0xff              0x40-0x7f
+
+      extended, transitive         0x9000-0xbfff          0x0000-0x3fff
+
+      extended, non-transitive     0xd000-0xffff          0x4000-0x7fff
+
+   Assignments consist of a name and the value.
+
+   The Type values 0x80-0x8f and 0xc0-0xcf for regular Types, and
+   0x8000-0x8fff and 0xc000-0xcfff for extended Types are for
+   Experimental use as defined in RFC 3692.
+
+   This document defines a class of extended communities called two-
+   octet AS specific extended community for which the IANA is to create
+   and maintain a registry entitled "Two-octet AS Specific Extended
+   Community".  All the communities in this class are of extended Types.
+   Future assignment are to be made using the "First Come First Served"
+   policy defined in [RFC2434].  The Type values for the transitive
+   communities of the two-octet AS specific extended community class are
+   0x0000-0x00ff, and for the non-transitive communities of that class
+   are 0x4000-0x40ff.  Assignments consist of a name and the value.
+
+   This document makes the following assignments for the two-octet AS
+   specific extended community:
+
+
+
+Sangli, et al.              Standards Track                     [Page 8]
+
+RFC 4360           BGP Extended Communities Attribute      February 2006
+
+
+      Name                                     Type Value
+      ----                                     ----------
+      two-octet AS specific Route Target       0x0002
+      two-octet AS specific Route Origin       0x0003
+
+   This document defines a class of extended communities called IPv4
+   address specific extended community for which the IANA is to create
+   and maintain a registry entitled "IPv4 Address Specific Extended
+   Community".  All the communities in this class are of extended Types.
+   Future assignment are to be made using the "First Come First Served"
+   policy defined in [RFC2434].  The Type values for the transitive
+   communities of the two-octet AS specific extended community class
+   are 0x0100-0x01ff, and for the non-transitive communities of that
+   class are 0x4100-0x41ff.  Assignments consist of a name and the
+   value.
+
+   This document makes the following assignments for the IPv4 address
+   specific extended community:
+
+      Name                                     Type Value
+      ----                                     ----------
+      IPv4 address specific Route Target       0x0102
+      IPv4 address specific Route Origin       0x0103
+
+   This document defines a class of extended communities called opaque
+   extended community for which the IANA is to create and maintain a
+   registry entitled "Opaque Extended Community".  All the communities
+   in this class are of extended Types.  Future assignment are to be
+   made using the "First Come First Served" policy defined in [RFC2434].
+   The Type values for the transitive communities of the opaque extended
+   community class are 0x0300-0x03ff, and for the non-transitive
+   communities of that class are 0x4300-0x43ff.  Assignments consist of
+   a name and the value.
+
+   When requesting an allocation from more than one registry defined
+   above, one may ask for allocating the same Type value from these
+   registries.  If possible, the IANA should accommodate such requests.
+
+8.  Security Considerations
+
+   This extension to BGP has similar security implications as BGP
+   Communities [RFC1997].
+
+   This extension to BGP does not change the underlying security issues.
+   Specifically, an operator who is relying on the information carried
+   in BGP must have a transitive trust relationship back to the source
+   of the information.  Specifying the mechanism(s) to provide such a
+   relationship is beyond the scope of this document.
+
+
+
+Sangli, et al.              Standards Track                     [Page 9]
+
+RFC 4360           BGP Extended Communities Attribute      February 2006
+
+
+9.  Acknowledgements
+
+   The authors would like to thank John Hawkinson, Jeffrey Haas, Bruno
+   Rijsman, Bill Fenner, and Alex Zinin for their suggestions and
+   feedback.
+
+10.  Normative References
+
+   [BGP-4]        Rekhter, Y. and T. Li, "A Border Gateway Protocol 4
+                  (BGP-4)", RFC 4271, January 2006.
+
+   [RFC1997]      Chandra, R., Traina, P., and T. Li, "BGP Communities
+                  Attribute", RFC 1997, August 1996.
+
+   [RFC2119]      Bradner, S., "Key words for use in RFCs to Indicate
+                  Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+   [RFC2434]      Narten, T. and H. Alvestrand, "Guidelines for Writing
+                  an IANA Considerations Section in RFCs", BCP 26, RFC
+                  2434, October 1998.
+
+   [RFC4020]      Kompella, K. and A. Zinin, "Early IANA Allocation of
+                  Standards Track Code Points", BCP 100, RFC 4020,
+                  February 2005.
+
+11.  Informative References
+
+   [RFC4364]      Rosen, E. and Y. Rekhter, "BGP/MPLS IP Virtual Private
+                  Networks (VPNs)", RFC 4364, February 2006.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Sangli, et al.              Standards Track                    [Page 10]
+
+RFC 4360           BGP Extended Communities Attribute      February 2006
+
+
+Authors' Addresses
+
+   Srihari R. Sangli
+   Cisco Systems, Inc.
+
+   EMail: rsrihari@cisco.com
+
+
+   Dan Tappan
+   Cisco Systems, Inc.
+   250 Apollo Drive
+   Chelmsford, MA 01824
+
+   EMail: tappan@cisco.com
+
+
+   Yakov Rekhter
+   Juniper Networks, Inc.
+   1194 N. Mathilda Ave
+   Sunnyvale, CA 94089
+
+   EMail: yakov@juniper.net
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Sangli, et al.              Standards Track                    [Page 11]
+
+RFC 4360           BGP Extended Communities Attribute      February 2006
+
+
+Full Copyright Statement
+
+   Copyright (C) The Internet Society (2006).
+
+   This document is subject to the rights, licenses and restrictions
+   contained in BCP 78, and except as set forth therein, the authors
+   retain all their rights.
+
+   This document and the information contained herein are provided on an
+   "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS
+   OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET
+   ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED,
+   INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE
+   INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED
+   WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Intellectual Property
+
+   The IETF takes no position regarding the validity or scope of any
+   Intellectual Property Rights or other rights that might be claimed to
+   pertain to the implementation or use of the technology described in
+   this document or the extent to which any license under such rights
+   might or might not be available; nor does it represent that it has
+   made any independent effort to identify any such rights.  Information
+   on the procedures with respect to rights in RFC documents can be
+   found in BCP 78 and BCP 79.
+
+   Copies of IPR disclosures made to the IETF Secretariat and any
+   assurances of licenses to be made available, or the result of an
+   attempt made to obtain a general license or permission for the use of
+   such proprietary rights by implementers or users of this
+   specification can be obtained from the IETF on-line IPR repository at
+   http://www.ietf.org/ipr.
+
+   The IETF invites any interested party to bring to its attention any
+   copyrights, patents or patent applications, or other proprietary
+   rights that may cover technology that may be required to implement
+   this standard.  Please address the information to the IETF at
+   ietf-ipr@ietf.org.
+
+Acknowledgement
+
+   Funding for the RFC Editor function is provided by the IETF
+   Administrative Support Activity (IASA).
+
+
+
+
+
+
+
+Sangli, et al.              Standards Track                    [Page 12]
+
diff --git a/doc/rfc4364.txt b/doc/rfc4364.txt
@@ -0,0 +1,2635 @@
+
+
+
+
+
+
+Network Working Group                                           E. Rosen
+Request for Comments: 4364                           Cisco Systems, Inc.
+Obsoletes: 2547                                               Y. Rekhter
+Category: Standards Track                         Juniper Networks, Inc.
+                                                           February 2006
+
+
+              BGP/MPLS IP Virtual Private Networks (VPNs)
+
+Status of This Memo
+
+   This document specifies an Internet standards track protocol for the
+   Internet community, and requests discussion and suggestions for
+   improvements.  Please refer to the current edition of the "Internet
+   Official Protocol Standards" (STD 1) for the standardization state
+   and status of this protocol.  Distribution of this memo is unlimited.
+
+Copyright Notice
+
+   Copyright (C) The Internet Society (2006).
+
+Abstract
+
+   This document describes a method by which a Service Provider may use
+   an IP backbone to provide IP Virtual Private Networks (VPNs) for its
+   customers.  This method uses a "peer model", in which the customers'
+   edge routers (CE routers) send their routes to the Service Provider's
+   edge routers (PE routers); there is no "overlay" visible to the
+   customer's routing algorithm, and CE routers at different sites do
+   not peer with each other.  Data packets are tunneled through the
+   backbone, so that the core routers do not need to know the VPN
+   routes.
+
+   This document obsoletes RFC 2547.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Rosen & Rekhter             Standards Track                     [Page 1]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+Table of Contents
+
+   1. Introduction ....................................................3
+      1.1. Virtual Private Networks ...................................4
+      1.2. Customer Edge and Provider Edge ............................5
+      1.3. VPNs with Overlapping Address Spaces .......................6
+      1.4. VPNs with Different Routes to the Same System ..............7
+      1.5. SP Backbone Routers ........................................7
+      1.6. Security ...................................................8
+   2. Sites and CEs ...................................................8
+   3. VRFs: Multiple Forwarding Tables in PEs .........................9
+      3.1. VRFs and Attachment Circuits ...............................9
+      3.2. Associating IP Packets with VRFs ..........................10
+      3.3. Populating the VRFs .......................................11
+   4. VPN Route Distribution via BGP .................................12
+      4.1. The VPN-IPv4 Address Family ...............................13
+      4.2. Encoding of Route Distinguishers ..........................14
+      4.3. Controlling Route Distribution ............................15
+           4.3.1. The Route Target Attribute .........................15
+           4.3.2. Route Distribution Among PEs by BGP ................17
+           4.3.3. Use of Route Reflectors ............................20
+           4.3.4. How VPN-IPv4 NLRI Is Carried in BGP ................22
+           4.3.5. Building VPNs Using Route Targets ..................23
+           4.3.6. Route Distribution Among VRFs in a Single PE .......23
+   5. Forwarding .....................................................23
+   6. Maintaining Proper Isolation of VPNs ...........................26
+   7. How PEs Learn Routes from CEs ..................................27
+   8. How CEs Learn Routes from PEs ..................................30
+   9. Carriers' Carriers .............................................30
+   10. Multi-AS Backbones ............................................32
+   11. Accessing the Internet from a VPN .............................34
+   12. Management VPNs ...............................................36
+   13. Security Considerations .......................................37
+      13.1. Data Plane ...............................................37
+      13.2. Control Plane ............................................39
+      13.3. Security of P and PE Devices .............................39
+   14. Quality of Service ............................................39
+   15. Scalability ...................................................40
+   16. IANA Considerations ...........................................40
+   17. Acknowledgements ..............................................41
+   18. Contributors ..................................................41
+   19. Normative References ..........................................44
+   20. Informative References ........................................45
+
+
+
+
+
+
+
+
+Rosen & Rekhter             Standards Track                     [Page 2]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+1.  Introduction
+
+   This document describes a method by which a Service Provider may use
+   an IP backbone to provide IP Virtual Private Networks (VPNs) for its
+   customers.  This method uses a "peer model", in which the customers'
+   edge routers (CE routers) send their routes to the Service Provider's
+   edge routers (PE routers).  Border Gateway Protocol (BGP)
+   [BGP, BGP-MP] is then used by the Service Provider to exchange the
+   routes of a particular VPN among the PE routers that are attached to
+   that VPN.  This is done in a way that ensures that routes from
+   different VPNs remain distinct and separate, even if two VPNs have an
+   overlapping address space.  The PE routers distribute, to the CE
+   routers in a particular VPN, the routes from other the CE routers in
+   that VPN.  The CE routers do not peer with each other, hence there is
+   no "overlay" visible to the VPN's routing algorithm.  The term "IP"
+   in "IP VPN" is used to indicate that the PE receives IP datagrams
+   from the CE, examines their IP headers, and routes them accordingly.
+
+   Each route within a VPN is assigned a Multiprotocol Label Switching
+   (MPLS) [MPLS-ARCH, MPLS-BGP, MPLS-ENCAPS] label; when BGP distributes
+   a VPN route, it also distributes an MPLS label for that route.
+   Before a customer data packet travels across the Service Provider's
+   backbone, it is encapsulated with the MPLS label that corresponds, in
+   the customer's VPN, to the route that is the best match to the
+   packet's destination address.  This MPLS packet is further
+   encapsulated (e.g., with another MPLS label or with an IP or Generic
+   Routing Encapsulation (GRE) tunnel header [MPLS-in-IP-GRE]) so that
+   it gets tunneled across the backbone to the proper PE router.  Thus,
+   the backbone core routers do not need to know the VPN routes.
+
+   The primary goal of this method is to support the case in which a
+   client obtains IP backbone services from a Service Provider or
+   Service Providers with which it maintains contractual relationships.
+   The client may be an enterprise, a group of enterprises that need an
+   extranet, an Internet Service Provider, an application service
+   provider, another VPN Service Provider that uses this same method to
+   offer VPNs to clients of its own, etc.  The method makes it very
+   simple for the client to use the backbone services.  It is also very
+   scalable and flexible for the Service Provider, and allows the
+   Service Provider to add value.
+
+
+
+
+
+
+
+
+
+
+
+Rosen & Rekhter             Standards Track                     [Page 3]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+1.1.  Virtual Private Networks
+
+   Consider a set of "sites" that are attached to a common network that
+   we call "the backbone".  Now apply some policy to create a number of
+   subsets of that set, and impose the following rule: two sites may
+   have IP interconnectivity over that backbone only if at least one of
+   these subsets contains them both.
+
+   These subsets are Virtual Private Networks (VPNs).  Two sites have IP
+   connectivity over the common backbone only if there is some VPN that
+   contains them both.  Two sites that have no VPN in common have no
+   connectivity over that backbone.
+
+   If all the sites in a VPN are owned by the same enterprise, the VPN
+   may be thought of as a corporate "intranet".  If the various sites in
+   a VPN are owned by different enterprises, the VPN may be thought of
+   as an "extranet".  A site can be in more than one VPN; e.g., in an
+   intranet and in several extranets.  In general, when we use the term
+   "VPN" we will not be distinguishing between intranets and extranets.
+
+   We refer to the owners of the sites as the "customers".  We refer to
+   the owners/operators of the backbone as the "Service Providers"
+   (SPs).  The customers obtain "VPN service" from the SPs.
+
+   A customer may be a single enterprise, a set of enterprises, an
+   Internet Service Provider, an Application Service Provider, another
+   SP that offers the same kind of VPN service to its own customers,
+   etc.
+
+   The policies that determine whether a particular collection of sites
+   is a VPN are the policies of the customers.  Some customers will want
+   the implementation of these policies to be entirely the
+   responsibility of the SP.  Other customers may want to share with the
+   SP the responsibility for implementing these policies.  This document
+   specifies mechanisms that can be used to implement these policies.
+   The mechanisms we describe are general enough to allow these policies
+   to be implemented either by the SP alone or by a VPN customer
+   together with the SP.  Most of the discussion is focused on the
+   former case, however.
+
+   The mechanisms discussed in this document allow the implementation of
+   a wide range of policies.  For example, within a given VPN, one can
+   allow every site to have a direct route to every other site ("full
+   mesh").  Alternatively, one can force traffic between certain pairs
+   of sites to be routed via a third site.  This can be useful, e.g., if
+   it is desired that traffic between a pair of sites be passed through
+   a firewall, and the firewall is located at the third site.
+
+
+
+
+Rosen & Rekhter             Standards Track                     [Page 4]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   In this document, we restrict our discussion to the case in which the
+   customer is explicitly purchasing VPN service from an SP, or from a
+   set of SPs that have agreed to cooperate to provide the VPN service.
+   That is, the customer is not merely purchasing internet access from
+   an SP, and the VPN traffic does not pass through a random collection
+   of interconnected SP networks.
+
+   We also restrict our discussion to the case in which the backbone
+   provides an IP service to the customer, rather than, e.g., a layer 2
+   service such as Frame Relay, Asynchronous Transfer Mode (ATM),
+   ethernet, High Level Data Link Control (HDLC), or Point-to-Point
+   Protocol (PPP).  The customer may attach to the backbone via one of
+   these (or other) layer 2 services, but the layer 2 service is
+   terminated at the "edge" of the backbone, where the customer's IP
+   datagrams are removed from any layer 2 encapsulation.
+
+   In the rest of this introduction, we specify some properties that
+   VPNs should have.  The remainder of this document specifies a set of
+   mechanisms that can be deployed to provide a VPN model that has all
+   these properties.  This section also introduces some of the technical
+   terminology used in the remainder of the document.
+
+1.2.  Customer Edge and Provider Edge
+
+   Routers can be attached to each other, or to end systems, in a
+   variety of different ways: PPP connections, ATM Virtual Circuits
+   (VCs), Frame Relay VCs, ethernet interfaces, Virtual Local Area
+   Networks (VLANs) on ethernet interfaces, GRE tunnels, Layer 2
+   Tunneling Protocol (L2TP) tunnels, IPsec tunnels, etc.  We will use
+   the term "attachment circuit" to refer generally to some such means
+   of attaching to a router.  An attachment circuit may be the sort of
+   connection that is usually thought of as a "data link", or it may be
+   a tunnel of some sort; what matters is that it be possible for two
+   devices to be network layer peers over the attachment circuit.
+
+   Each VPN site must contain one or more Customer Edge (CE) devices.
+   Each CE device is attached, via some sort of attachment circuit, to
+   one or more Provider Edge (PE) routers.
+
+   Routers in the SP's network that do not attach to CE devices are
+   known as "P routers".
+
+   CE devices can be hosts or routers.  In a typical case, a site
+   contains one or more routers, some of which are attached to PE
+   routers.  The site routers that attach to the PE routers would then
+   be the CE devices, or "CE routers".  However, there is nothing to
+   prevent a non-routing host from attaching directly to a PE router, in
+   which case the host would be a CE device.
+
+
+
+Rosen & Rekhter             Standards Track                     [Page 5]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   Sometimes, what is physically attached to a PE router is a layer 2
+   switch.  In this case, we do NOT say that the layer 2 switch is a CE
+   device.  Rather, the CE devices are the hosts and routers that
+   communicate with the PE router through the layer 2 switch; the layer
+   2 infrastructure is transparent.  If the layer 2 infrastructure
+   provides a multipoint service, then multiple CE devices can be
+   attached to the PE router over the same attachment circuit.
+
+   CE devices are logically part of a customer's VPN.  PE and P routers
+   are logically part of the SP's network.
+
+   The attachment circuit over which a packet travels when going from CE
+   to PE is known as that packet's "ingress attachment circuit", and the
+   PE as the packet's "ingress PE".  The attachment circuit over which a
+   packet travels when going from PE to CE is known as that packet's
+   "egress attachment circuit", and the PE as the packet's "egress PE".
+
+   We will say that a PE router is attached to a particular VPN if it is
+   attached to a CE device that is in a site of that VPN.  Similarly, we
+   will say that a PE router is attached to a particular site if it is
+   attached to a CE device that is in that site.
+
+   When the CE device is a router, it is a routing peer of the PE(s) to
+   which it is attached, but it is NOT a routing peer of CE routers at
+   other sites.  Routers at different sites do not directly exchange
+   routing information with each other; in fact, they do not even need
+   to know of each other at all.  As a consequence, the customer has no
+   backbone or "virtual backbone" to manage, and does not have to deal
+   with any inter-site routing issues.  In other words, in the scheme
+   described in this document, a VPN is NOT an "overlay" on top of the
+   SP's network.
+
+   With respect to the management of the edge devices, clear
+   administrative boundaries are maintained between the SP and its
+   customers.  Customers are not required to access the PE or P routers
+   for management purposes, nor is the SP required to access the CE
+   devices for management purposes.
+
+1.3.  VPNs with Overlapping Address Spaces
+
+   If two VPNs have no sites in common, then they may have overlapping
+   address spaces.  That is, a given address might be used in VPN V1 as
+   the address of system S1, but in VPN V2 as the address of a
+   completely different system S2.  This is a common situation when the
+   VPNs each use an RFC 1918 private address space.  Of course, within
+   each VPN, each address must be unambiguous.
+
+
+
+
+
+Rosen & Rekhter             Standards Track                     [Page 6]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   Even two VPNs that do have sites in common may have overlapping
+   address spaces, as long as there is no need for any communication
+   between systems with such addresses and systems in the common sites.
+
+1.4.  VPNs with Different Routes to the Same System
+
+   Although a site may be in multiple VPNs, it is not necessarily the
+   case that the route to a given system at that site should be the same
+   in all the VPNs.  Suppose, for example, we have an intranet
+   consisting of sites A, B, and C, and an extranet consisting of A, B,
+   C, and the "foreign" site D.  Suppose that at site A there is a
+   server, and we want clients from B, C, or D to be able to use that
+   server.  Suppose also that at site B there is a firewall.  We want
+   all the traffic from site D to the server to pass through the
+   firewall, so that traffic from the extranet can be access controlled.
+   However, we don't want traffic from C to pass through the firewall on
+   the way to the server, since this is intranet traffic.
+
+   It is possible to set up two routes to the server.  One route, used
+   by sites B and C, takes the traffic directly to site A.  The second
+   route, used by site D, takes the traffic instead to the firewall at
+   site B.  If the firewall allows the traffic to pass, it then appears
+   to be traffic coming from site B, and follows the route to site A.
+
+1.5.  SP Backbone Routers
+
+   The SP's backbone consists of the PE routers, as well as other
+   routers ("P routers") that do not attach to CE devices.
+
+   If every router in an SP's backbone had to maintain routing
+   information for all the VPNs supported by the SP, there would be
+   severe scalability problems; the number of sites that could be
+   supported would be limited by the amount of routing information that
+   could be held in a single router.  It is important therefore that the
+   routing information about a particular VPN only needs to be present
+   in the PE routers that attach to that VPN.  In particular, the P
+   routers do not need to have ANY per-VPN routing information
+   whatsoever.  (This condition may need to be relaxed somewhat when
+   multicast routing is considered.  This is not considered further in
+   this paper, but is examined in [VPN-MCAST].)
+
+   So just as the VPN owners do not have a backbone or "virtual
+   backbone" to administer, the SPs themselves do not have a separate
+   backbone or "virtual backbone" to administer for each VPN.  Site-to-
+   site routing in the backbone is optimal (within the constraints of
+   the policies used to form the VPNs) and is not constrained in any way
+   by an artificial "virtual topology" of tunnels.
+
+
+
+
+Rosen & Rekhter             Standards Track                     [Page 7]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   Section 10 discusses some of the special issues that arise when the
+   backbone spans several Service Providers.
+
+1.6.  Security
+
+   VPNs of the sort being discussed here, even without making use of
+   cryptographic security measures, are intended to provide a level of
+   security equivalent to that obtainable when a layer 2 backbone (e.g.,
+   Frame Relay) is used.  That is, in the absence of misconfiguration or
+   deliberate interconnection of different VPNs, it is not possible for
+   systems in one VPN to gain access to systems in another VPN.  Of
+   course, the methods described herein do not by themselves encrypt the
+   data for privacy, nor do they provide a way to determine whether data
+   has been tampered with en route.  If this is desired, cryptographic
+   measures must be applied in addition. (See, e.g., [MPLS/BGP-IPsec].)
+   Security is discussed in more detail in Section 13.
+
+2.  Sites and CEs
+
+   From the perspective of a particular backbone network, a set of IP
+   systems may be regarded as a "site" if those systems have mutual IP
+   interconnectivity that doesn't require use of the backbone.  In
+   general, a site will consist of a set of systems that are in
+   geographic proximity.  However, this is not universally true.  If two
+   geographic locations are connected via a leased line, over which Open
+   Shortest Path First (OSPF) protocol [OSPFv2] is running, and if that
+   line is the preferred way of communicating between the two locations,
+   then the two locations can be regarded as a single site, even if each
+   location has its own CE router.  (This notion of "site" is
+   topological, rather than geographical.  If the leased line goes down,
+   or otherwise ceases to be the preferred route, but the two geographic
+   locations can continue to communicate by using the VPN backbone, then
+   one site has become two.)
+
+   A CE device is always regarded as being in a single site (though as
+   we shall see in Section 3.2, a site may consist of multiple "virtual
+   sites").  A site, however, may belong to multiple VPNs.
+
+   A PE router may attach to CE devices from any number of different
+   sites, whether those CE devices are in the same or in different VPNs.
+   A CE device may, for robustness, attach to multiple PE routers, of
+   the same or of different service providers.  If the CE device is a
+   router, the PE router and the CE router will appear as router
+   adjacencies to each other.
+
+   While we speak mostly of "sites" as being the basic unit of
+   interconnection, nothing here prevents a finer degree of granularity
+   in the control of interconnectivity.  For example, certain systems at
+
+
+
+Rosen & Rekhter             Standards Track                     [Page 8]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   a site may be members of an intranet as well as members of one or
+   more extranets, while other systems at the same site may be
+   restricted to being members of the intranet only.  However, this
+   might require that the site have two attachment circuits to the
+   backbone, one for the intranet and one for the extranet; it might
+   further require that firewall functionality be applied on the
+   extranet attachment circuit.
+
+3.  VRFs: Multiple Forwarding Tables in PEs
+
+   Each PE router maintains a number of separate forwarding tables.  One
+   of the forwarding tables is the "default forwarding table".  The
+   others are "VPN Routing and Forwarding tables", or "VRFs".
+
+3.1.  VRFs and Attachment Circuits
+
+   Every PE/CE attachment circuit is associated, by configuration, with
+   one or more VRFs.  An attachment circuit that is associated with a
+   VRF is known as a "VRF attachment circuit".
+
+   In the simplest case and most typical case, a PE/CE attachment
+   circuit is associated with exactly one VRF.  When an IP packet is
+   received over a particular attachment circuit, its destination IP
+   address is looked up in the associated VRF.  The result of that
+   lookup determines how to route the packet.  The VRF used by a
+   packet's ingress PE for routing a particular packet is known as the
+   packet's "ingress VRF".  (There is also the notion of a packet's
+   "egress VRF", located at the packet's egress PE; this is discussed in
+   Section 5.)
+
+   If an IP packet arrives over an attachment circuit that is not
+   associated with any VRF, the packet's destination address is looked
+   up in the default forwarding table, and the packet is routed
+   accordingly.  Packets forwarded according to the default forwarding
+   table include packets from neighboring P or PE routers, as well as
+   packets from customer-facing attachment circuits that have not been
+   associated with VRFs.
+
+   Intuitively, one can think of the default forwarding table as
+   containing "public routes", and of the VRFs as containing "private
+   routes".  One can similarly think of VRF attachment circuits as being
+   "private", and of non-VRF attachment circuits as being "public".
+
+   If a particular VRF attachment circuit connects site S to a PE
+   router, then connectivity from S (via that attachment circuit) can be
+   restricted by controlling the set of routes that gets entered in the
+   corresponding VRF.  The set of routes in that VRF should be limited
+   to the set of routes leading to sites that have at least one VPN in
+
+
+
+Rosen & Rekhter             Standards Track                     [Page 9]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   common with S.  Then a packet sent from S over a VRF attachment
+   circuit can only be routed by the PE to another site S' if S' is in
+   one of the same VPNs as S.  That is, communication (via PE routers)
+   is prevented between any pair of VPN sites that have no VPN in
+   common.  Communication between VPN sites and non-VPN sites is
+   prevented by keeping the routes to the VPN sites out of the default
+   forwarding table.
+
+   If there are multiple attachment circuits leading from S to one or
+   more PE routers, then there might be multiple VRFs that could be used
+   to route traffic from S.  To properly restrict S's connectivity, the
+   same set of routes would have to exist in all the VRFs.
+   Alternatively, one could impose different connectivity restrictions
+   over different attachment circuit from S.  In that case, some of the
+   VRFs associated with attachment circuits from S would contain
+   different sets of routes than some of the others.
+
+   We allow the case in which a single attachment circuit is associated
+   with a set of VRFs, rather than with a single VRF.  This can be
+   useful if it is desired to divide a single VPN into several
+   "sub-VPNs", each with different connectivity restrictions, where some
+   characteristic of the customer packets is used to select from among
+   the sub-VPNs.  For simplicity though, we will usually speak of an
+   attachment circuit as being associated with a single VRF.
+
+3.2.  Associating IP Packets with VRFs
+
+   When a PE router receives a packet from a CE device, it must
+   determine the attachment circuit over which the packet arrived, as
+   this determines in turn the VRF (or set of VRFs) that can be used for
+   forwarding that packet.  In general, to determine the attachment
+   circuit over which a packet arrived, a PE router takes note of the
+   physical interface over which the packet arrived, and possibly also
+   takes note of some aspect of the packet's layer 2 header.  For
+   example, if a packet's ingress attachment circuit is a Frame Relay
+   VC, the identity of the attachment circuit can be determined from the
+   physical Frame Relay interface over which the packet arrived,
+   together with the Data Link Connection Identifier (DLCI) field in the
+   packet's Frame Relay header.
+
+   Although the PE's conclusion that a particular packet arrived on a
+   particular attachment circuit may be partially determined by the
+   packet's layer 2 header, it must be impossible for a customer, by
+   writing the header fields, to fool the SP into thinking that a packet
+   that was received over one attachment circuit really arrived over a
+   different one.  In the example above, although the attachment circuit
+   is determined partially by inspection of the DLCI field in the Frame
+   Relay header, this field cannot be set freely by the customer.
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 10]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   Rather, it must be set to a value specified by the SP, or else the
+   packet cannot arrive at the PE router.
+
+   In some cases, a particular site may be divided by the customer into
+   several "virtual sites".  The SP may designate a particular set of
+   VRFs to be used for routing packets from that site and may allow the
+   customer to set some characteristic of the packet, which is then used
+   for choosing a particular VRF from the set.
+
+   For example, each virtual site might be realized as a VLAN.  The SP
+   and the customer could agree that on packets arriving from a
+   particular CE, certain VLAN values would be used to identify certain
+   VRFs.  Of course, packets from that CE would be discarded by the PE
+   if they carry VLAN tag values that are not in the agreed-upon set.
+   Another way to accomplish this is to use IP source addresses.  In
+   this case, the PE uses the IP source address in a packet received
+   from the CE, along with the interface over which the packet is
+   received, to assign the packet to a particular VRF.  Again, the
+   customer would only be able to select from among the particular set
+   of VRFs that that customer is allowed to use.
+
+   If it is desired to have a particular host be in multiple virtual
+   sites, then that host must determine, for each packet, which virtual
+   site the packet is associated with.  It can do this, e.g., by sending
+   packets from different virtual sites on different VLANs, or out
+   different network interfaces.
+
+3.3. Populating the VRFs
+
+   With what set of routes are the VRFs populated?
+
+   As an example, let PE1, PE2, and PE3 be three PE routers, and let
+   CE1, CE2, and CE3 be three CE routers.  Suppose that PE1 learns, from
+   CE1, the routes that are reachable at CE1's site.  If PE2 and PE3 are
+   attached, respectively, to CE2 and CE3, and there is some VPN V
+   containing CE1, CE2, and CE3, then PE1 uses BGP to distribute to PE2
+   and PE3 the routes that it has learned from CE1.  PE2 and PE3 use
+   these routes to populate the VRFs that they associate, respectively,
+   with the sites of CE2 and CE3.  Routes from sites that are not in VPN
+   V do not appear in these VRFs, which means that packets from CE2 or
+   CE3 cannot be sent to sites that are not in VPN V.
+
+   When we speak of a PE "learning" routes from a CE, we are not
+   presupposing any particular learning technique.  The PE may learn
+   routes by means of a dynamic routing algorithm, but it may also
+   "learn" routes by having those routes configured (i.e., static
+   routing).  (In this case, to say that the PE "learned" the routes
+   from the CE is perhaps to exercise a bit of poetic license.)
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 11]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   PEs also need to learn, from other PEs, the routes that belong to a
+   given VPN.  The procedures to be used for populating the VRFs with
+   the proper sets of routes are specified in Section 4.
+
+   If there are multiple attachment circuits leading from a particular
+   PE router to a particular site, they might all be mapped to the same
+   forwarding table.  But if policy dictates, they could be mapped to
+   different forwarding tables.  For instance, the policy might be that
+   a particular attachment circuit from a site is used only for intranet
+   traffic, while another attachment circuit from that site is used only
+   for extranet traffic.  (Perhaps, e.g., the CE attached to the
+   extranet attachment circuit is a firewall, while the CE attached to
+   the intranet attachment circuit is not.)  In this case, the two
+   attachment circuits would be associated with different VRFs.
+
+   Note that if two attachment circuits are associated with the same
+   VRF, then packets that the PE receives over one of them will be able
+   to reach exactly the same set of destinations as packets that the PE
+   receives over the other.  So two attachment circuits cannot be
+   associated with the same VRF unless each CE is in the exact same set
+   of VPNs as is the other.
+
+   If an attachment circuit leads to a site which is in multiple VPNs,
+   the attachment circuit may still associated with a single VRF, in
+   which case the VRF will contain routes from the full set of VPNs of
+   which the site is a member.
+
+4.  VPN Route Distribution via BGP
+
+   PE routers use BGP to distribute VPN routes to each other (more
+   accurately, to cause VPN routes to be distributed to each other).
+
+   We allow each VPN to have its own address space, which means that a
+   given address may denote different systems in different VPNs.  If two
+   routes to the same IP address prefix are actually routes to different
+   systems, it is important to ensure that BGP not treat them as
+   comparable.  Otherwise, BGP might choose to install only one of them,
+   making the other system unreachable.  Further, we must ensure that
+   POLICY is used to determine which packets get sent on which routes;
+   given that several such routes are installed by BGP, only one such
+   must appear in any particular VRF.
+
+   We meet these goals by the use of a new address family, as specified
+   below.
+
+
+
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 12]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+4.1.  The VPN-IPv4 Address Family
+
+   The BGP Multiprotocol Extensions [BGP-MP] allow BGP to carry routes
+   from multiple "address families".  We introduce the notion of the
+   "VPN-IPv4 address family".  A VPN-IPv4 address is a 12-byte quantity,
+   beginning with an 8-byte Route Distinguisher (RD) and ending with a
+   4-byte IPv4 address.  If several VPNs use the same IPv4 address
+   prefix, the PEs translate these into unique VPN-IPv4 address
+   prefixes.  This ensures that if the same address is used in several
+   different VPNs, it is possible for BGP to carry several completely
+   different routes to that address, one for each VPN.
+
+   Since VPN-IPv4 addresses and IPv4 addresses are different address
+   families, BGP never treats them as comparable addresses.
+
+   An RD is simply a number, and it does not contain any inherent
+   information; it does not identify the origin of the route or the set
+   of VPNs to which the route is to be distributed.  The purpose of the
+   RD is solely to allow one to create distinct routes to a common IPv4
+   address prefix.  Other means are used to determine where to
+   redistribute the route (see Section 4.3).
+
+   The RD can also be used to create multiple different routes to the
+   very same system.  We have already discussed a situation in which the
+   route to a particular server should be different for intranet traffic
+   than for extranet traffic.  This can be achieved by creating two
+   different VPN-IPv4 routes that have the same IPv4 part, but different
+   RDs.  This allows BGP to install multiple different routes to the
+   same system, and allows policy to be used (see Section 4.3.5) to
+   decide which packets use which route.
+
+   The RDs are structured so that every Service Provider can administer
+   its own "numbering space" (i.e., can make its own assignments of
+   RDs), without conflicting with the RD assignments made by any other
+   Service Provider.  An RD consists of three fields: a 2-byte type
+   field, an administrator field, and an assigned number field.  The
+   value of the type field determines the lengths of the other two
+   fields, as well as the semantics of the administrator field.  The
+   administrator field identifies an assigned number authority, and the
+   assigned number field contains a number that has been assigned, by
+   the identified authority, for a particular purpose.  For example, one
+   could have an RD whose administrator field contains an Autonomous
+   System number (ASN), and whose (4-byte) number field contains a
+   number assigned by the SP to whom that ASN belongs (having been
+   assigned to that SP by the appropriate authority).
+
+   RDs are given this structure in order to ensure that an SP that
+   provides VPN backbone service can always create a unique RD when it
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 13]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   needs to do so.  However, the structure is not meaningful to BGP;
+   when BGP compares two such address prefixes, it ignores the structure
+   entirely.
+
+   A PE needs to be configured such that routes that lead to a
+   particular CE become associated with a particular RD.  The
+   configuration may cause all routes leading to the same CE to be
+   associated with the same RD, or it may cause different routes to be
+   associated with different RDs, even if they lead to the same CE.
+
+4.2.  Encoding of Route Distinguishers
+
+   As stated, a VPN-IPv4 address consists of an 8-byte Route
+   Distinguisher followed by a 4-byte IPv4 address.  The RDs are encoded
+   as follows:
+
+     - Type Field: 2 bytes
+     - Value Field: 6 bytes
+
+   The interpretation of the Value field depends on the value of the
+   type field.  At the present time, three values of the type field are
+   defined: 0, 1, and 2.
+
+     - Type 0: The Value field consists of two subfields:
+
+         * Administrator subfield: 2 bytes
+         * Assigned Number subfield: 4 bytes
+
+       The Administrator subfield must contain an Autonomous System
+       number.  If this ASN is from the public ASN space, it must have
+       been assigned by the appropriate authority (use of ASN values
+       from the private ASN space is strongly discouraged).  The
+       Assigned Number subfield contains a number from a numbering space
+       that is administered by the enterprise to which the ASN has been
+       assigned by an appropriate authority.
+
+     - Type 1: The Value field consists of two subfields:
+
+         * Administrator subfield: 4 bytes
+         * Assigned Number subfield: 2 bytes
+
+       The Administrator subfield must contain an IP address.  If this
+       IP address is from the public IP address space, it must have been
+       assigned by an appropriate authority (use of addresses from the
+       private IP address space is strongly discouraged).  The Assigned
+       Number subfield contains a number from a numbering space which is
+       administered by the enterprise to which the IP address has been
+       assigned.
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 14]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+     - Type 2: The Value field consists of two subfields:
+
+         * Administrator subfield: 4 bytes
+         * Assigned Number subfield: 2 bytes
+
+       The Administrator subfield must contain a 4-byte Autonomous
+       System number [BGP-AS4].  If this ASN is from the public ASN
+       space, it must have been assigned by the appropriate authority
+       (use of ASN values from the private ASN space is strongly
+       discouraged).  The Assigned Number subfield contains a number
+       from a numbering space which is administered by the enterprise to
+       which the ASN has been assigned by an appropriate authority.
+
+4.3.  Controlling Route Distribution
+
+   In this section, we discuss the way in which the distribution of the
+   VPN-IPv4 routes is controlled.
+
+   If a PE router is attached to a particular VPN (by being attached to
+   a particular CE in that VPN), it learns some of that VPN's IP routes
+   from the attached CE router.  Routes learned from a CE routing peer
+   over a particular attachment circuit may be installed in the VRF
+   associated with that attachment circuit.  Exactly which routes are
+   installed in this manner is determined by the way in which the PE
+   learns routes from the CE.  In particular, when the PE and CE are
+   routing protocol peers, this is determined by the decision process of
+   the routing protocol; this is discussed in Section 7.
+
+   These routes are then converted to VPN-IP4 routes, and "exported" to
+   BGP.  If there is more than one route to a particular VPN-IP4 address
+   prefix, BGP chooses the "best" one, using the BGP decision process.
+   That route is then distributed by BGP to the set of other PEs that
+   need to know about it.  At these other PEs, BGP will again choose the
+   best route for a particular VPN-IP4 address prefix.  Then the chosen
+   VPN-IP4 routes are converted back into IP routes, and "imported" into
+   one or more VRFs.  Whether they are actually installed in the VRFs
+   depends on the decision process of the routing method used between
+   the PE and those CEs that are associated with the VRF in question.
+   Finally, any route installed in a VRF may be distributed to the
+   associated CE routers.
+
+4.3.1.  The Route Target Attribute
+
+   Every VRF is associated with one or more Route Target (RT)
+   attributes.
+
+   When a VPN-IPv4 route is created (from an IPv4 route that the PE has
+   learned from a CE) by a PE router, it is associated with one or more
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 15]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   Route Target attributes.  These are carried in BGP as attributes of
+   the route.
+
+   Any route associated with Route Target T must be distributed to every
+   PE router that has a VRF associated with Route Target T.  When such a
+   route is received by a PE router, it is eligible to be installed in
+   those of the PE's VRFs that are associated with Route Target T.
+   (Whether it actually gets installed depends upon the outcome of the
+   BGP decision process, and upon the outcome of the decision process of
+   the IGP (i.e., the intra-domain routing protocol) running on the
+   PE/CE interface.)
+
+   A Route Target attribute can be thought of as identifying a set of
+   sites.  (Though it would be more precise to think of it as
+   identifying a set of VRFs.)  Associating a particular Route Target
+   attribute with a route allows that route to be placed in the VRFs
+   that are used for routing traffic that is received from the
+   corresponding sites.
+
+   There is a set of Route Targets that a PE router attaches to a route
+   received from site S; these may be called the "Export Targets".  And
+   there is a set of Route Targets that a PE router uses to determine
+   whether a route received from another PE router could be placed in
+   the VRF associated with site S; these may be called the "Import
+   Targets".  The two sets are distinct, and need not be the same.  Note
+   that a particular VPN-IPv4 route is only eligible for installation in
+   a particular VRF if there is some Route Target that is both one of
+   the route's Route Targets and one of the VRF's Import Targets.
+
+   The function performed by the Route Target attribute is similar to
+   that performed by the BGP Communities attribute.  However, the format
+   of the latter is inadequate for present purposes, since it allows
+   only a 2-byte numbering space.  It is desirable to structure the
+   format, similar to what we have described for RDs (see Section 4.2),
+   so that a type field defines the length of an administrator field,
+   and the remainder of the attribute is a number from the specified
+   administrator's numbering space.  This can be done using BGP Extended
+   Communities.  The Route Targets discussed herein are encoded as BGP
+   Extended Community Route Targets [BGP-EXTCOMM].  They are structured
+   similarly to the RDs.
+
+   When a BGP speaker has received more than one route to the same VPN-
+   IPv4 prefix, the BGP rules for route preference are used to choose
+   which VPN-IPv4 route is installed by BGP.
+
+   Note that a route can only have one RD, but it can have multiple
+   Route Targets.  In BGP, scalability is improved if one has a single
+   route with multiple attributes, as opposed to multiple routes.  One
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 16]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   could eliminate the Route Target attribute by creating more routes
+   (i.e., using more RDs), but the scaling properties would be less
+   favorable.
+
+   How does a PE determine which Route Target attributes to associate
+   with a given route?  There are a number of different possible ways.
+   The PE might be configured to associate all routes that lead to a
+   specified site with a specified Route Target.  Or the PE might be
+   configured to associate certain routes leading to a specified site
+   with one Route Target, and certain with another.
+
+   If the PE and the CE are themselves BGP peers (see Section 7), then
+   the SP may allow the customer, within limits, to specify how its
+   routes are to be distributed.  The SP and the customer would need to
+   agree in advance on the set of RTs that are allowed to be attached to
+   the customer's VPN routes.  The CE could then attach one or more of
+   those RTs to each IP route that it distributes to the PE.  This gives
+   the customer the freedom to specify in real time, within agreed-upon
+   limits, its route distribution policies.  If the CE is allowed to
+   attach RTs to its routes, the PE MUST filter out all routes that
+   contain RTs that the customer is not allowed to use.  If the CE is
+   not allowed to attach RTs to its routes, but does so anyway, the PE
+   MUST remove the RT before converting the customer's route to a VPN-
+   IPv4 route.
+
+4.3.2.  Route Distribution Among PEs by BGP
+
+   If two sites of a VPN attach to PEs that are in the same Autonomous
+   System, the PEs can distribute VPN-IPv4 routes to each other by means
+   of an IBGP connection between them.  (The term "IBGP" refers to the
+   set of protocols and procedures used when there is a BGP connection
+   between two BGP speakers in the same Autonomous System.  This is
+   distinguished from "EBGP", the set of procedures used between two BGP
+   speakers in different Autonomous Systems.)  Alternatively, each can
+   have an IBGP connection to a route reflector [BGP-RR].
+
+   When a PE router distributes a VPN-IPv4 route via BGP, it uses its
+   own address as the "BGP next hop".  This address is encoded as a
+   VPN-IPv4 address with an RD of 0.  ([BGP-MP] requires that the next
+   hop address be in the same address family as the Network Layer
+   Reachability Information (NLRI).)  It also assigns and distributes an
+   MPLS label.  (Essentially, PE routers distribute not VPN-IPv4 routes,
+   but Labeled VPN-IPv4 routes.  Cf. [MPLS-BGP].)  When the PE processes
+   a received packet that has this label at the top of the stack, the PE
+   will pop the stack, and process the packet appropriately.
+
+
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 17]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   The PE may distribute the exact set of routes that appears in the
+   VRF, or it may perform summarization and distribute aggregates of
+   those routes, or it may do some of one and some of the other.
+
+   Suppose that a PE has assigned label L to route R, and has
+   distributed this label mapping via BGP.  If R is an aggregate of a
+   set of routes in the VRF, the PE will know that packets from the
+   backbone that arrive with this label must have their destination
+   addresses looked up in a VRF.  When the PE looks up the label in its
+   Label Information Base, it learns which VRF must be used.  On the
+   other hand, if R is not an aggregate, then when the PE looks up the
+   label, it learns the egress attachment circuit, as well as the
+   encapsulation header for the packet.  In this case, no lookup in the
+   VRF is done.
+
+   We would expect that the most common case would be the case where the
+   route is NOT an aggregate.  The case where it is an aggregate can be
+   very useful though if the VRF contains a large number of host routes
+   (e.g., as in dial-in), or if the VRF has an associated Local Area
+   Network (LAN) interface (where there is a different outgoing layer 2
+   header for each system on the LAN, but a route is not distributed for
+   each such system).
+
+   Whether or not each route has a distinct label is an implementation
+   matter.  There are a number of possible algorithms one could use to
+   determine whether two routes get assigned the same label:
+
+     - One may choose to have a single label for an entire VRF, so that
+       a single label is shared by all the routes from that VRF.  Then
+       when the egress PE receives a packet with that label, it must
+       look up the packet's IP destination address in that VRF (the
+       packet's "egress VRF"), in order to determine the packet's egress
+       attachment circuit and the corresponding data link encapsulation.
+
+     - One may choose to have a single label for each attachment
+       circuit, so that a single label is shared by all the routes with
+       the same "outgoing attachment circuit".  This enables one to
+       avoid doing a lookup in the egress VRF, though some sort of
+       lookup may need to be done in order to determine the data link
+       encapsulation, e.g., an Address Resolution Protocol (ARP) lookup.
+
+     - One may choose to have a distinct label for each route.  Then if
+       a route is potentially reachable over more than one attachment
+       circuit, the PE/CE routing can switch the preferred path for a
+       route from one attachment circuit to another, without there being
+       any need to distribute new a label for that route.
+
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 18]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   There may be other possible algorithms as well.  The choice of
+   algorithm is entirely at the discretion of the egress PE, and is
+   otherwise transparent.
+
+   In using BGP-distributed MPLS labels in this manner, we presuppose
+   that an MPLS packet carrying such a label can be tunneled from the
+   router that installs the corresponding BGP-distributed route to the
+   router that is the BGP next hop of that route.  This requires either
+   that a label switched path exist between those two routers or else
+   that some other tunneling technology (e.g., [MPLS-in-IP-GRE]) can be
+   used between them.
+
+   This tunnel may follow a "best effort" route, or it may follow a
+   traffic-engineered route.  Between a given pair of routers, there may
+   be one such tunnel, or there may be several, perhaps with different
+   Quality of Service (QoS) characteristics.  All that matters for the
+   VPN architecture is that some such tunnel exists.  To ensure
+   interoperability among systems that implement this VPN architecture
+   using MPLS label switched paths as the tunneling technology, all such
+   systems MUST support Label Distribution Protocol (LDP) [MPLS-LDP].
+   In particular, Downstream Unsolicited mode MUST be supported on
+   interfaces that are neither Label Controlled ATM (LC-ATM) [MPLS-ATM]
+   nor Label Controlled Frame Relay (LC-FR) [MPLS-FR] interfaces, and
+   Downstream on Demand mode MUST be supported on LC-ATM interfaces and
+   LC-FR interfaces.
+
+   If the tunnel follows a best-effort route, then the PE finds the
+   route to the remote endpoint by looking up its IP address in the
+   default forwarding table.
+
+   A PE router, UNLESS it is a route reflector (see Section 4.3.3) or an
+   Autonomous System Border Router (ASBR) for an inter-provider VPN (see
+   Section 10), should not install a VPN-IPv4 route unless it has at
+   least one VRF with an Import Target identical to one of the route's
+   Route Target attributes.  Inbound filtering should be used to cause
+   such routes to be discarded.  If a new Import Target is later added
+   to one of the PE's VRFs (a "VPN Join" operation), it must then
+   acquire the routes it may previously have discarded.  This can be
+   done using the refresh mechanism described in [BGP-RFSH].  The
+   outbound route filtering mechanism of [BGP-ORF] can also be used to
+   advantage to make the filtering more dynamic.
+
+   Similarly, if a particular Import Target is no longer present in any
+   of a PE's VRFs (as a result of one or more "VPN Prune" operations),
+   the PE may discard all routes that, as a result, no longer have any
+   of the PE's VRF's Import Targets as one of their Route Target
+   attributes.
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 19]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   A router that is not attached to any VPN and that is not a Route
+   Reflector (i.e., a P router) never installs any VPN-IPv4 routes at
+   all.
+
+   Note that VPN Join and Prune operations are non-disruptive and do not
+   require any BGP connections to be brought down, as long as the
+   refresh mechanism of [BGP-RFSH] is used.
+
+   As a result of these distribution rules, no one PE ever needs to
+   maintain all routes for all VPNs; this is an important scalability
+   consideration.
+
+4.3.3.  Use of Route Reflectors
+
+   Rather than having a complete IBGP mesh among the PEs, it is
+   advantageous to make use of BGP Route Reflectors [BGP-RR] to improve
+   scalability.  All the usual techniques for using route reflectors to
+   improve scalability (e.g., route reflector hierarchies) are
+   available.
+
+   Route reflectors are the only systems that need to have routing
+   information for VPNs to which they are not directly attached.
+   However, there is no need to have any one route reflector know all
+   the VPN-IPv4 routes for all the VPNs supported by the backbone.
+
+   We outline below two different ways to partition the set of VPN-IPv4
+   routes among a set of route reflectors.
+
+      1. Each route reflector is preconfigured with a list of Route
+         Targets.  For redundancy, more than one route reflector may be
+         preconfigured with the same list.  A route reflector uses the
+         preconfigured list of Route Targets to construct its inbound
+         route filtering.  The route reflector may use the techniques of
+         [BGP-ORF] to install on each of its peers (regardless of
+         whether the peer is another route reflector or a PE) the set of
+         Outbound Route Filters (ORFs) that contains the list of its
+         preconfigured Route Targets.  Note that route reflectors should
+         accept ORFs from other route reflectors, which means that route
+         reflectors should advertise the ORF capability to other route
+         reflectors.
+
+         A service provider may modify the list of preconfigured Route
+         Targets on a route reflector.  When this is done, the route
+         reflector modifies the ORFs it installs on all of its IBGP
+         peers.  To reduce the frequency of configuration changes on
+         route reflectors, each route reflector may be preconfigured
+         with a block of Route Targets.  This way, when a new Route
+         Target is needed for a new VPN, there is already one or more
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 20]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+         route reflectors that are (pre)configured with this Route
+         Target.
+
+         Unless a given PE is a client of all route reflectors, when a
+         new VPN is added to the PE ("VPN Join"), it will need to become
+         a client of the route reflector(s) that maintain routes for
+         that VPN.  Likewise, deleting an existing VPN from the PE ("VPN
+         Prune") may result in a situation where the PE no longer needs
+         to be a client of some route reflector(s).  In either case, the
+         Join or Prune operation is non-disruptive (as long as
+         [BGP-RFSH] is used, and never requires a BGP connection to be
+         brought down, only to be brought right back up.
+
+         (By "adding a new VPN to a PE", we really mean adding a new
+         import Route Target to one of its VRFs, or adding a new VRF
+         with an import Route Target not had by any of the PE's other
+         VRFs.)
+
+      2. Another method is to have each PE be a client of some subset of
+         the route reflectors.  A route reflector is not preconfigured
+         with the list of Route Targets, and does not perform inbound
+         route filtering of routes received from its clients (PEs);
+         rather, it accepts all the routes received from all of its
+         clients (PEs).  The route reflector keeps track of the set of
+         the Route Targets carried by all the routes it receives.  When
+         the route reflector receives from its client a route with a
+         Route Target that is not in this set, this Route Target is
+         immediately added to the set.  On the other hand, when the
+         route reflector no longer has any routes with a particular
+         Route Target that is in the set, the route reflector should
+         delay (by a few hours) the deletion of this Route Target from
+         the set.
+
+         The route reflector uses this set to form the inbound route
+         filters that it applies to routes received from other route
+         reflectors.  The route reflector may also use ORFs to install
+         the appropriate outbound route filtering on other route
+         reflectors.  Just like with the first approach, a route
+         reflector should accept ORFs from other route reflectors.  To
+         accomplish this, a route reflector advertises ORF capability to
+         other route reflectors.
+
+         When the route reflector changes the set, it should immediately
+         change its inbound route filtering.  In addition, if the route
+         reflector uses ORFs, then the ORFs have to be immediately
+         changed to reflect the changes in the set.  If the route
+         reflector doesn't use ORFs, and a new Route Target is added to
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 21]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+         the set, the route reflector, after changing its inbound route
+         filtering, must issue BGP Refresh to other route reflectors.
+
+         The delay of "a few hours" mentioned above allows a route
+         reflector to hold onto routes with a given RT, even after it
+         loses the last of its clients that are interested in such
+         routes.  This protects against the need to reacquire all such
+         routes if the clients' "disappearance" is only temporary.
+
+         With this procedure, VPN Join and Prune operations are also
+         non-disruptive.
+
+         Note that this technique will not work properly if some client
+         PE has a VRF with an import Route Target that is not one of its
+         export Route Targets.
+
+   In these procedures, a PE router which attaches to a particular VPN
+   "auto-discovers" the other PEs that attach to the same VPN.  When a
+   new PE router is added, or when an existing PE router attaches to a
+   new VPN, no reconfiguration of other PE routers is needed.
+
+   Just as there is no one PE router that needs to know all the VPN-IPv4
+   routes supported over the backbone, these distribution rules ensure
+   that there is no one Route Reflector (RR) that needs to know all the
+   VPN-IPv4 routes supported over the backbone.  As a result, the total
+   number of such routes that can be supported over the backbone is not
+   bounded by the capacity of any single device, and therefore can
+   increase virtually without bound.
+
+4.3.4.  How VPN-IPv4 NLRI Is Carried in BGP
+
+   The BGP Multiprotocol Extensions [BGP-MP] are used to encode the
+   NLRI.  If the Address Family Identifier (AFI) field is set to 1, and
+   the Subsequent Address Family Identifier (SAFI) field is set to 128,
+   the NLRI is an MPLS-labeled VPN-IPv4 address.  AFI 1 is used since
+   the network layer protocol associated with the NLRI is still IP.
+   Note that this VPN architecture does not require the capability to
+   distribute unlabeled VPN-IPv4 addresses.
+
+   In order for two BGP speakers to exchange labeled VPN-IPv4 NLRI, they
+   must use BGP Capabilities Advertisement to ensure that they both are
+   capable of properly processing such NLRI.  This is done as specified
+   in [BGP-MP], by using capability code 1 (multiprotocol BGP), with an
+   AFI of 1 and an SAFI of 128.
+
+   The labeled VPN-IPv4 NLRI itself is encoded as specified in
+   [MPLS-BGP], where the prefix consists of an 8-byte RD followed by an
+   IPv4 prefix.
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 22]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+4.3.5.  Building VPNs Using Route Targets
+
+   By setting up the Import Targets and Export Targets properly, one can
+   construct different kinds of VPNs.
+
+   Suppose it is desired to create a fully meshed closed user group,
+   i.e., a set of sites where each can send traffic directly to the
+   other, but traffic cannot be sent to or received from other sites.
+   Then each site is associated with a VRF, a single Route Target
+   attribute is chosen, that Route Target is assigned to each VRF as
+   both the Import Target and the Export Target, and that Route Target
+   is not assigned to any other VRFs as either the Import Target or the
+   Export Target.
+
+   Alternatively, suppose one desired, for whatever reason, to create a
+   "hub and spoke" kind of VPN.  This could be done by the use of two
+   Route Target values, one meaning "Hub" and one meaning "Spoke".  At
+   the VRFs attached to the hub sites, "Hub" is the Export Target and
+
+   "Spoke" is the Import Target.  At the VRFs attached to the spoke
+   site, "Hub" is the Import Target and "Spoke" is the Export Target.
+
+   Thus, the methods for controlling the distribution of routing
+   information among various sets of sites are very flexible, which in
+   turn provides great flexibility in constructing VPNs.
+
+4.3.6.  Route Distribution Among VRFs in a Single PE
+
+   It is possible to distribute routes from one VRF to another, even if
+   both VRFs are in the same PE, even though in this case one cannot say
+   that the route has been distributed by BGP.  Nevertheless, the
+   decision to distribute a particular route from one VRF to another
+   within a single PE is the same decision that would be made if the
+   VRFs were on different PEs.  That is, it depends on the Route Target
+   attribute that is assigned to the route (or would be assigned if the
+   route were distributed by BGP), and the import target of the second
+   VRF.
+
+5.  Forwarding
+
+   If the intermediate routers in the backbone do not have any
+   information about the routes to the VPNs, how are packets forwarded
+   from one VPN site to another?
+
+   When a PE receives an IP packet from a CE device, it chooses a
+   particular VRF in which to look up the packet's destination address.
+   This choice is based on the packet's ingress attachment circuit.
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 23]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   Assume that a match is found.  As a result we learn the packet's
+   "next hop".
+
+   If the packet's next hop is reached directly over a VRF attachment
+   circuit from this PE (i.e., the packet's egress attachment circuit is
+   on the same PE as its ingress attachment circuit), then the packet is
+   sent on the egress attachment circuit, and no MPLS labels are pushed
+   onto the packet's label stack.
+
+   If the ingress and egress attachment circuits are on the same PE, but
+   are associated with different VRFs, and if the route that best
+   matches the destination address in the ingress attachment circuit's
+   VRF is an aggregate of several routes in the egress attachment
+   circuit's VRF, it may be necessary to look up the packet's
+   destination address in the egress VRF as well.
+
+   If the packet's next hop is NOT reached through a VRF attachment
+   circuit, then the packet must travel at least one hop through the
+   backbone.  The packet thus has a "BGP Next Hop", and the BGP Next Hop
+   will have assigned an MPLS label for the route that best matches the
+   packet's destination address.  Call this label the "VPN route label".
+   The IP packet is turned into an MPLS packet with the VPN route label
+   as the sole label on the label stack.
+
+   The packet must then be tunneled to the BGP Next Hop.
+
+   If the backbone supports MPLS, this is done as follows:
+
+     - The PE routers (and any Autonomous System border routers) that
+       redistribute VPN-IPv4 addresses need to insert /32 address
+       prefixes for themselves into the IGP routing tables of the
+       backbone.  This enables MPLS, at each node in the backbone
+       network, to assign a label corresponding to the route to each PE
+       router.  To ensure interoperability among different
+       implementations, it is required to support LDP for setting up the
+       label switched paths across the backbone.  However, other methods
+       of setting up these label switched paths are also possible.
+       (Some of these other methods may not require the presence of the
+       /32 address prefixes in the IGP.)
+
+     - If there are any traffic engineering tunnels to the BGP next hop,
+       and if one or more of those is available for use by the packet in
+       question, one of these tunnels is chosen.  This tunnel will be
+       associated with an MPLS label, the "tunnel label".  The tunnel
+       label gets pushed on the MPLS label stack, and the packet is
+       forwarded to the tunnel's next hop.
+
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 24]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+     - Otherwise,
+
+         * The packet will have an "IGP Next Hop", which is the next hop
+           along the IGP route to the BGP Next Hop.
+
+         * If the BGP Next Hop and the IGP Next Hop are the same, and if
+           penultimate hop popping is used, the packet is then sent to
+           the IGP Next Hop, carrying only the VPN route label.
+
+         * Otherwise, the IGP Next Hop will have assigned a label for
+           the route that best matches the address of the BGP Next Hop.
+           Call this the "tunnel label".  The tunnel label gets pushed
+           on as the packet's top label.  The packet is then forwarded
+           to the IGP Next Hop.
+
+     - MPLS will then carry the packet across the backbone to the BGP
+       Next Hop, where the VPN label will be examined.
+
+   If the backbone does not support MPLS, the MPLS packet carrying only
+   the VPN route label may be tunneled to the BGP Next Hop using the
+   techniques of [MPLS-in-IP-GRE].  When the packet emerges from the
+   tunnel, it will be at the BGP Next Hop, where the VPN route label
+   will be examined.
+
+   At the BGP Next Hop, the treatment of the packet depends on the VPN
+   route label (see Section 4.3.2).  In many cases, the PE will be able
+   to determine, from this label, the attachment circuit over which the
+   packet should be transmitted (to a CE device), as well as the proper
+   data link layer header for that interface.  In other cases, the PE
+   may only be able to determine that the packet's destination address
+   needs to be looked up in a particular VRF before being forwarded to a
+   CE device.  There are also intermediate cases in which the VPN route
+   label may determine the packet's egress attachment circuit, but a
+   lookup (e.g., ARP) still needs to be done in order to determine the
+   packet's data link header on that attachment circuit.
+
+   Information in the MPLS header itself, and/or information associated
+   with the label, may also be used to provide QoS on the interface to
+   the CE.
+
+   In any event, if the packet was an unlabeled IP packet when it
+   arrived at its ingress PE, it will again be an unlabeled packet when
+   it leaves its egress PE.
+
+   The fact that packets with VPN route labels are tunneled through the
+   backbone is what makes it possible to keep all the VPN routes out of
+   the P routers.  This is crucial to ensuring the scalability of the
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 25]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   scheme.  The backbone does not even need to have routes to the CEs,
+   only to the PEs.
+
+   With respect to the tunnels, it is worth noting that this
+   specification:
+
+     - DOES NOT require that the tunnels be point-to-point; multipoint-
+       to-point can be used;
+
+     - DOES NOT require that there be any explicit setup of the tunnels,
+       either via signaling or via manual configuration;
+
+     - DOES NOT require that there be any tunnel-specific signaling;
+
+     - DOES NOT require that there be any tunnel-specific state in the P
+       or PE routers, beyond what is necessary to maintain the routing
+       information and (if used) the MPLS label information.
+
+   Of course, this specification is compatible with the use of point-
+   to-point tunnels that must be explicitly configured and/or signaled,
+   and in some situations there may be reasons for using such tunnels.
+
+   The considerations that are relevant to choosing a particular
+   tunneling technology are outside the scope of this specification.
+
+6.  Maintaining Proper Isolation of VPNs
+
+   To maintain proper isolation of one VPN from another, it is important
+   that no router in the backbone accept a tunneled packet from outside
+   the backbone, unless it is sure that both endpoints of that tunnel
+   are outside the backbone.
+
+   If MPLS is being used as the tunneling technology, this means that a
+   router in the backbone MUST NOT accept a labeled packet from any
+   adjacent non-backbone device unless the following two conditions
+   hold:
+
+      1. the label at the top of the label stack was actually
+         distributed by that backbone router to that non-backbone
+         device, and
+
+      2. the backbone router can determine that use of that label will
+         cause the packet to leave the backbone before any labels lower
+         in the stack will be inspected, and before the IP header will
+         be inspected.
+
+   The first condition ensure that any labeled packets received from
+   non-backbone routers have a legitimate and properly assigned label at
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 26]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   the top of the label stack.  The second condition ensures that the
+   backbone routers will never look below that top label.  Of course,
+   the simplest way to meet these two conditions is just to have the
+   backbone devices refuse to accept labeled packets from non-backbone
+   devices.
+
+   If MPLS is not being used as the tunneling technology, then filtering
+   must be done to ensure that an MPLS-in-IP or MPLS-in-GRE packet can
+   be accepted into the backbone only if the packet's IP destination
+   address will cause it to be sent outside the backbone.
+
+7.  How PEs Learn Routes from CEs
+
+   The PE routers that attach to a particular VPN need to know, for each
+   attachment circuit leading to that VPN, which of the VPN's addresses
+   should be reached over that attachment circuit.
+
+   The PE translates these addresses into VPN-IPv4 addresses, using a
+   configured RD.  The PE then treats these VPN-IPv4 routes as input to
+   BGP.  Routes from a VPN site are NOT leaked into the backbone's IGP.
+
+   Exactly which PE/CE route distribution techniques are possible
+   depends on whether or not a particular CE is in a "transit VPN".  A
+   "transit VPN" is one that contains a router that receives routes from
+   a "third party" (i.e., from a router that is not in the VPN, but is
+   not a PE router) and that redistributes those routes to a PE router.
+   A VPN that is not a transit VPN is a "stub VPN".  The vast majority
+   of VPNs, including just about all corporate enterprise networks,
+   would be expected to be "stubs" in this sense.
+
+   The possible PE/CE distribution techniques are:
+
+      1. Static routing (i.e., configuration) may be used. (This is
+         likely to be useful only in stub VPNs.)
+
+      2. PE and CE routers may be Routing Information Protocol (RIP)
+         [RIP] peers, and the CE may use RIP to tell the PE router the
+         set of address prefixes that are reachable at the CE router's
+         site.  When RIP is configured in the CE, care must be taken to
+         ensure that address prefixes from other sites (i.e., address
+         prefixes learned by the CE router from the PE router) are never
+         advertised to the PE.  More precisely:  if a PE router, say,
+         PE1, receives a VPN-IPv4 route R1, and as a result distributes
+         an IPv4 route R2 to a CE, then R2 must not be distributed back
+         from that CE's site to a PE router, say, PE2, (where PE1 and
+         PE2 may be the same router or different routers), unless PE2
+         maps R2 to a VPN-IPv4 route that is different than (i.e.,
+         contains a different RD than) R1.
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 27]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+      3. The PE and CE routers may be OSPF peers.  A PE router that is
+         an OSPF peer of a CE router appears, to the CE router, to be an
+         area 0 router.  If a PE router is an OSPF peer of CE routers
+         that are in distinct VPNs, the PE must of course be running
+         multiple instances of OSPF.
+
+         IPv4 routes that the PE learns from the CE via OSPF are
+         redistributed into BGP as VPN-IPv4 routes.  Extended Community
+         attributes are used to carry, along with the route, all the
+         information needed to enable the route to be distributed to
+         other CE routers in the VPN in the proper type of OSPF Link
+         State Advertisement (LSA).  OSPF route tagging is used to
+         ensure that routes received from the MPLS/BGP backbone are not
+         sent back into the backbone.
+
+         Specification of the complete set of procedures for the use of
+         OSPF between PE and CE can be found in [VPN-OSPF] and
+         [OSPF-2547-DNBIT].
+
+      4. The PE and CE routers may be BGP peers, and the CE router may
+         use BGP (in particular, EBGP to tell the PE router the set of
+         address prefixes that are at the CE router's site. (This
+         technique can be used in stub VPNs or transit VPNs.)
+
+         This technique has a number of advantages over the others:
+
+            a) Unlike the IGP alternatives, this does not require the PE
+               to run multiple routing algorithm instances in order to
+               talk to multiple CEs.
+
+            b) BGP is explicitly designed for just this function:
+               passing routing information between systems run by
+               different administrations.
+
+            c) If the site contains "BGP backdoors", i.e., routers with
+               BGP connections to routers other than PE routers, this
+               procedure will work correctly in all circumstances.  The
+               other procedures may or may not work, depending on the
+               precise circumstances.
+
+            d) Use of BGP makes it easy for the CE to pass attributes of
+               the routes to the PE.  A complete specification of the
+               set of attributes and their use is outside the scope of
+               this document.  However, some examples of the way this
+               may be used are the following:
+
+
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 28]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+                 - The CE may suggest a particular Route Target for each
+                   route, from among the Route Targets that the PE is
+                   authorized to attach to the route.  The PE would then
+                   attach only the suggested Route Target, rather than
+                   the full set.  This gives the CE administrator some
+                   dynamic control of the distribution of routes from
+                   the CE.
+
+                 - Additional types of Extended Community attributes may
+                   be defined, where the intention is to have those
+                   attributes passed transparently (i.e., without being
+                   changed by the PE routers) from CE to CE.  This would
+                   allow CE administrators to implement additional route
+                   filtering, beyond that which is done by the PEs.
+                   This additional filtering would not require
+                   coordination with the SP.
+
+         On the other hand, using BGP may be something new for the CE
+         administrators.
+
+         If a site is not in a transit VPN, note that it need not have a
+         unique Autonomous System Number (ASN).  Every CE whose site is
+         not in a transit VPN can use the same ASN.  This can be chosen
+         from the private ASN space, and it will be stripped out by the
+         PE.  Routing loops are prevented by use of the Site of Origin
+         attribute (see below).
+
+         What if a set of sites constitutes a transit VPN?  This will
+         generally be the case only if the VPN is itself an Internet
+         Service Provider's (ISP's) network, where the ISP is itself
+         buying backbone services from another SP.  The latter SP may be
+         called a "carrier's carrier".  In this case, the best way to
+         provide the VPN is to have the CE routers support MPLS, and to
+         use the technique described in Section 9.
+
+   When we do not need to distinguish among the different ways in which
+   a PE can be informed of the address prefixes that exist at a given
+   site, we will simply say that the PE has "learned" the routes from
+   that site.  This includes the case where the PE has been manually
+   configured with the routes.
+
+   Before a PE can redistribute a VPN-IPv4 route learned from a site, it
+   must assign a Route Target attribute (see Section 4.3.1) to the
+   route, and it may assign a Site of Origin attribute to the route.
+
+   The Site of Origin attribute, if used, is encoded as a Route Origin
+   Extended Community [BGP-EXTCOMM].  The purpose of this attribute is
+   to uniquely identify the set of routes learned from a particular
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 29]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   site.  This attribute is needed in some cases to ensure that a route
+   learned from a particular site via a particular PE/CE connection is
+   not distributed back to the site through a different PE/CE
+   connection.  It is particularly useful if BGP is being used as the
+   PE/CE protocol, but different sites have not been assigned distinct
+   ASNs.
+
+8.  How CEs Learn Routes from PEs
+
+   In this section, we assume that the CE device is a router.
+
+   If the PE places a particular route in the VRF it uses to route
+   packets received from a particular CE, then in general, the PE may
+   distribute that route to the CE.  Of course, the PE may distribute
+   that route to the CE only if this is permitted by the rules of the
+   PE/CE protocol.  (For example, if a particular PE/CE protocol has
+   "split horizon", certain routes in the VRF cannot be redistributed
+   back to the CE.)  We add one more restriction on the distribution of
+   routes from PE to CE: if a route's Site of Origin attribute
+   identifies a particular site, that route must never be redistributed
+   to any CE at that site.
+
+   In most cases, however, it will be sufficient for the PE to simply
+   distribute the default route to the CE.  (In some cases, it may even
+   be sufficient for the CE to be configured with a default route
+   pointing to the PE.)  This will generally work at any site that does
+   not itself need to distribute the default route to other sites.
+   (E.g., if one site in a corporate VPN has the corporation's access to
+   the Internet, that site might need to have default distributed to the
+   other site, but one could not distribute default to that site
+   itself.)
+
+   Whatever procedure is used to distribute routes from CE to PE will
+   also be used to distribute routes from PE to CE.
+
+9.  Carriers' Carriers
+
+   Sometimes a VPN may actually be the network of an ISP, with its own
+   peering and routing policies.  Sometimes a VPN may be the network of
+   an SP that is offering VPN services in turn to its own customers.
+   VPNs like these can also obtain backbone service from another SP, the
+   "carrier's carrier", using essentially the same methods described in
+   this document.  However, it is necessary in these cases that the CE
+   routers support MPLS.  In particular:
+
+     - The CE routers should distribute to the PE routers ONLY those
+       routes that are internal to the VPN.  This allows the VPN to be
+       handled as a stub VPN.
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 30]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+     - The CE routers should support MPLS, in that they should be able
+       to receive labels from the PE routers, and send labeled packets
+       to the PE routers.  They do not need to distribute labels of
+       their own, though.
+
+     - The PE routers should distribute, to the CE routers, labels for
+       the routes they distribute to the CE routers.
+
+       The PE must not distribute the same label to two different CEs
+       unless one of the following conditions holds:
+
+         * The two CEs are associated with exactly the same set of VRFs;
+
+         * The PE maintains a different Incoming Label Map ([MPLS-ARCH])
+           for each CE.
+
+       Further, when the PE receives a labeled packet from a CE, it must
+       verify that the top label is one that was distributed to that CE.
+
+     - Routers at the different sites should establish BGP connections
+       among themselves for the purpose of exchanging external routes
+       (i.e., routes that lead outside of the VPN).
+
+     - All the external routes must be known to the CE routers.
+
+   Then when a CE router looks up a packet's destination address, the
+   routing lookup will resolve to an internal address, usually the
+   address of the packet's BGP next hop.  The CE labels the packet
+   appropriately and sends the packet to the PE.  The PE, rather than
+   looking up the packet's IP destination address in a VRF, uses the
+   packet's top MPLS label to select the BGP next hop.  As a result, if
+   the BGP next hop is more than one hop away, the top label will be
+   replaced by two labels, a tunnel label and a VPN route label.  If the
+   BGP next hop is one hop away, the top label may be replaced by just
+   the VPN route label.  If the ingress PE is also the egress PE, the
+   top label will just be popped.  When the packet is sent from its
+   egress PE to a CE, the packet will have one fewer MPLS labels than it
+   had when it was first received by its ingress PE.
+
+   In the above procedure, the CE routers are the only routers in the
+   VPN that need to support MPLS.  If, on the other hand, all the
+   routers at a particular VPN site support MPLS, then it is no longer
+   required that the CE routers know all the external routes.  All that
+   is required is that the external routes be known to whatever routers
+   are responsible for putting the label stack on a hitherto unlabeled
+   packet and that there be label switched path that leads from those
+   routers to their BGP peers at other sites.  In this case, for each
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 31]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   internal route that a CE router distributes to a PE router, it must
+   also distribute a label.
+
+10.  Multi-AS Backbones
+
+   What if two sites of a VPN are connected to different Autonomous
+   Systems (e.g., because the sites are connected to different SPs)?
+   The PE routers attached to that VPN will then not be able to maintain
+   IBGP connections with each other, or with a common route reflector.
+   Rather, there needs to be some way to use EBGP to distribute VPN-IPv4
+   addresses.
+
+   There are a number of different ways of handling this case, which we
+   present in order of increasing scalability.
+
+      a) VRF-to-VRF connections at the AS (Autonomous System) border
+         routers.
+
+         In this procedure, a PE router in one AS attaches directly to a
+         PE router in another.  The two PE routers will be attached by
+         multiple sub-interfaces, at least one for each of the VPNs
+         whose routes need to be passed from AS to AS.  Each PE will
+         treat the other as if it were a CE router.  That is, the PEs
+         associate each such sub-interface with a VRF, and use EBGP to
+         distribute unlabeled IPv4 addresses to each other.
+
+         This is a procedure that "just works", and that does not
+         require MPLS at the border between ASes.  However, it does not
+         scale as well as the other procedures discussed below.
+
+      b) EBGP redistribution of labeled VPN-IPv4 routes from AS to
+         neighboring AS.
+
+         In this procedure, the PE routers use IBGP to redistribute
+         labeled VPN-IPv4 routes either to an Autonomous System Border
+         Router (ASBR), or to a route reflector of which an ASBR is a
+         client.  The ASBR then uses EBGP to redistribute those labeled
+         VPN-IPv4 routes to an ASBR in another AS, which in turn
+         distributes them to the PE routers in that AS, or perhaps to
+         another ASBR which in turn distributes them, and so on.
+
+         When using this procedure, VPN-IPv4 routes should only be
+         accepted on EBGP connections at private peering points, as part
+         of a trusted arrangement between SPs.  VPN-IPv4 routes should
+         neither be distributed to nor accepted from the public
+         Internet, or from any BGP peers that are not trusted.  An ASBR
+         should never accept a labeled packet from an EBGP peer unless
+         it has actually distributed the top label to that peer.
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 32]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+         If there are many VPNs having sites attached to different
+         Autonomous Systems, there does not need to be a single ASBR
+         between those two ASes that holds all the routes for all the
+         VPNs; there can be multiple ASBRs, each of which holds only the
+         routes for a particular subset of the VPNs.
+
+         This procedure requires that there be a label switched path
+         leading from a packet's ingress PE to its egress PE.  Hence the
+         appropriate trust relationships must exist between and among
+         the set of ASes along the path.  Also, there must be agreement
+         among the set of SPs as to which border routers need to receive
+         routes with which Route Targets.
+
+      c) Multi-hop EBGP redistribution of labeled VPN-IPv4 routes
+         between source and destination ASes, with EBGP redistribution
+         of labeled IPv4 routes from AS to neighboring AS.
+
+         In this procedure, VPN-IPv4 routes are neither maintained nor
+         distributed by the ASBRs.  An ASBR must maintain labeled IPv4
+         /32 routes to the PE routers within its AS.  It uses EBGP to
+         distribute these routes to other ASes.  ASBRs in any transit
+         ASes will also have to use EBGP to pass along the labeled /32
+         routes.  This results in the creation of a label switched path
+         from the ingress PE router to the egress PE router.  Now PE
+         routers in different ASes can establish multi-hop EBGP
+         connections to each other, and can exchange VPN-IPv4 routes
+         over those connections.
+
+         If the /32 routes for the PE routers are made known to the P
+         routers of each AS, everything works normally.  If the /32
+         routes for the PE routers are NOT made known to the P routers
+         (other than the ASBRs), then this procedure requires a packet's
+         ingress PE to put a three-label stack on it.  The bottom label
+         is assigned by the egress PE, corresponding to the packet's
+         destination address in a particular VRF.  The middle label is
+         assigned by the ASBR, corresponding to the /32 route to the
+         egress PE.  The top label is assigned by the ingress PE's IGP
+         Next Hop, corresponding to the /32 route to the ASBR.
+
+         To improve scalability, one can have the multi-hop EBGP
+         connections exist only between a route reflector in one AS and
+         a route reflector in another.  (However, when the route
+         reflectors distribute routes over this connection, they do not
+         modify the BGP next hop attribute of the routes.)  The actual
+         PE routers would then only have IBGP connections to the route
+         reflectors in their own AS.
+
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 33]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+         This procedure is very similar to the "carrier's carrier"
+         procedures described in Section 9.  Like the previous
+         procedure, it requires that there be a label switched path
+         leading from a packet's ingress PE to its egress PE.
+
+11.  Accessing the Internet from a VPN
+
+   Many VPN sites will need to be able to access the public Internet, as
+   well as to access other VPN sites.  The following describes some of
+   the alternative ways of doing this.
+
+      1. In some VPNs, one or more of the sites will obtain Internet
+         access by means of an "Internet gateway" (perhaps a firewall)
+         attached to a non-VRF interface to an ISP.  The ISP may or may
+         not be the same organization as the SP that is providing the
+         VPN service.  Traffic to/from the Internet gateway would then
+         be routed according to the PE router's default forwarding
+         table.
+
+         In this case, the sites that have Internet access may be
+         distributing a default route to their PEs, which in turn
+         redistribute it to other PEs and hence into other sites of the
+         VPN.  This provides Internet access for all of the VPN's sites.
+
+         In order to properly handle traffic from the Internet, the ISP
+         must distribute, to the Internet, routes leading to addresses
+         that are within the VPN.  This is completely independent of any
+         of the route distribution procedures described in this
+         document.  The internal structure of the VPN will in general
+         not be visible from the Internet; such routes would simply lead
+         to the non-VRF interface that attaches to the VPN's Internet
+         gateway.
+
+         In this model, there is no exchange of routes between a PE
+         router's default forwarding table and any of its VRFs.  VPN
+         route distribution procedures and Internet route distribution
+         procedures are completely independent.
+
+         Note that although some sites of the VPN use a VRF interface to
+         communicate with the Internet, ultimately all packets to/from
+         the Internet traverse a non-VRF interface before
+         leaving/entering the VPN, so we refer to this as "non-VRF
+         Internet access".
+
+         Note that the PE router to which the non-VRF interface attaches
+         does not necessarily need to maintain all the Internet routes
+         in its default forwarding table.  The default forwarding table
+         could have as few as one route, "default", which leads to
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 34]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+         another router (probably an adjacent one) that has the Internet
+         routes.  A variation of this scheme is to tunnel packets
+         received over the non-VRF interface from the PE router to
+         another router, where this other router maintains the full set
+         of Internet routes.
+
+      2. Some VPNs may obtain Internet access via a VRF interface ("VRF
+         Internet access").  If a packet is received by a PE over a VRF
+         interface, and if the packet's destination address does not
+         match any route in the VRF, then it may be matched against the
+         PE's default forwarding table.  If a match is made there, the
+         packet can be forwarded natively through the backbone to the
+         Internet, instead of being forwarded by MPLS.
+
+         In order for traffic to flow natively in the opposite direction
+         (from Internet to VRF interface), some of the routes from the
+         VRF must be exported to the Internet forwarding table.
+         Needless to say, any such routes must correspond to globally
+         unique addresses.
+
+         In this scheme, the default forwarding table might have the
+         full set of Internet routes, or it might have as little as a
+         single default route leading to another router that does have
+         the full set of Internet routes in its default forwarding
+         table.
+
+      3. Suppose the PE has the capability to store "non-VPN routes" in
+         a VRF.  If a packet's destination address matches a "non-VPN
+         route", then the packet is transmitted natively, rather than
+         being transmitted via MPLS.  If the VRF contains a non-VPN
+         default route, all packets for the public Internet will match
+         it, and be forwarded natively to the default route's next hop.
+         At that next hop, the packets' destination addresses will be
+         looked up in the default forwarding table, and may match more
+         specific routes.
+
+         This technique would only be available if none of the CE
+         routers is distributing a default route.
+
+      4. It is also possible to obtain Internet access via a VRF
+         interface by having the VRF contain the Internet routes.
+         Compared with model 2, this eliminates the second lookup, but
+         it has the disadvantage of requiring the Internet routes to be
+         replicated in each such VRF.
+
+         If this technique is used, the SP may want to make its
+         interface to the Internet be a VRF interface, and to use the
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 35]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+         techniques of Section 4 to distribute Internet routes, as VPN-
+         IPv4 routes, to other VRFs.
+
+   It should be clearly understood that by default, there is no exchange
+   of routes between a VRF and the default forwarding table.  This is
+   done ONLY upon agreement between a customer and an SP, and only if it
+   suits the customer's policies.
+
+12.  Management VPNs
+
+   This specification does not require that the sub-interface connecting
+   a PE router and a CE router be a "numbered" interface.  If it is a
+   numbered interface, this specification allows the addresses assigned
+   to the interface to come from either the address space of the VPN or
+   the address space of the SP.
+
+   If a CE router is being managed by the Service Provider, then the
+   Service Provider will likely have a network management system that
+   needs to be able to communicate with the CE router.  In this case,
+   the addresses assigned to the sub-interface connecting the CE and PE
+   routers should come from the SP's address space, and should be unique
+   within that space.  The network management system should itself
+   connect to a PE router (more precisely, be at a site that connects to
+   a PE router) via a VRF interface.  The address of the network
+   management system will be exported to all VRFs that are associated
+   with interfaces to CE routers that are managed by the SP.  The
+   addresses of the CE routers will be exported to the VRF associated
+   with the network management system, but not to any other VRFs.
+
+   This allows communication between the CE and network management
+   system, but does not allow any undesired communication to or among
+   the CE routers.
+
+   One way to ensure that the proper route import/exports are done is to
+   use two Route Targets; call them T1 and T2.  If a particular VRF
+   interface attaches to a CE router that is managed by the SP, then
+   that VRF is configured to:
+
+     - import routes that have T1 attached to them, and
+
+     - attach T2 to addresses assigned to each end of its VRF
+       interfaces.
+
+   If a particular VRF interface attaches to the SP's network management
+   system, then that VRF is configured to attach T1 to the address of
+   that system, and to import routes that have T2 attached to them.
+
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 36]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+13.  Security Considerations
+
+13.1.  Data Plane
+
+   By security in the "data plane", we mean protection against the
+   following possibilities:
+
+     - Packets from within a VPN travel to a site outside the VPN, other
+       than in a manner consistent with the policies of the VPN.
+
+     - Packets from outside a VPN enter one of the VPN's sites, other
+       than in a manner consistent with the policies of the VPN.
+
+   Under the following conditions:
+
+      1. a backbone router does not accept labeled packets over a
+         particular data link, unless it is known that that data link
+         attaches only to trusted systems, or unless it is known that
+         such packets will leave the backbone before the IP header or
+         any labels lower in the stack will be inspected, and
+
+      2. labeled VPN-IPv4 routes are not accepted from untrusted or
+         unreliable routing peers,
+
+      3. no successful attacks have been mounted on the control plane,
+
+   the data plane security provided by this architecture is virtually
+   identical to that provided to VPNs by Frame Relay or ATM backbones.
+   If the devices under the control of the SP are properly configured,
+   data will not enter or leave a VPN unless authorized to do so.
+
+   Condition 1 above can be stated more precisely.  One should discard a
+   labeled packet received from a particular neighbor unless one of the
+   following two conditions holds:
+
+     - the packet's top label has a label value that the receiving
+       system has distributed to that neighbor, or
+
+     - the packet's top label has a label value that the receiving
+       system has distributed to a system beyond that neighbor (i.e.,
+       when it is known that the path from the system to which the label
+       was distributed to the receiving system may be via that
+       neighbor).
+
+
+
+
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 37]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   Condition 2 above is of most interest in the case of inter-provider
+   VPNs (see Section 10).  For inter-provider VPNs constructed according
+   to scheme b) of Section 10, condition 2 is easily checked.  (The
+   issue of security when scheme (c) of Section 10 is used is for
+   further study.)
+
+   It is worth noting that the use of MPLS makes it much simpler to
+   provide data plane security than might be possible if one attempted
+   to use some form of IP tunneling in place of the MPLS outer label.
+   It is a simple matter to have one's border routers refuse to accept a
+   labeled packet unless the first of the above conditions applies to
+   it.  It is rather more difficult to configure a router to refuse to
+   accept an IP packet if that packet is an IP tunneled packet whose
+   destination address is that of a PE router; certainly, this is not
+   impossible to do, but it has both management and performance
+   implications.
+
+   MPLS-in-IP and MPLS-in-GRE tunneling are specified in
+   [MPLS-in-IP-GRE].  If it is desired to use such tunnels to carry VPN
+   packets, then the security considerations described in Section 8 of
+   that document must be fully understood.  Any implementation of
+   BGP/MPLS IP VPNs that allows VPN packets to be tunneled as described
+   in that document MUST contain an implementation of IPsec that can be
+   used as therein described.  If the tunnel is not secured by IPsec,
+   then the technique of IP address filtering at the border routers,
+   described in Section 8.2 of that document, is the only means of
+   ensuring that a packet that exits the tunnel at a particular egress
+   PE was actually placed in the tunnel by the proper tunnel head node
+   (i.e., that the packet does not have a spoofed source address).
+   Since border routers frequently filter only source addresses, packet
+   filtering may not be effective unless the egress PE can check the IP
+   source address of any tunneled packet it receives, and compare it to
+   a list of IP addresses that are valid tunnel head addresses.  Any
+   implementation that allows MPLS-in-IP and/or MPLS-in-GRE tunneling to
+   be used without IPsec MUST allow the egress PE to validate in this
+   manner the IP source address of any tunneled packet that it receives.
+
+   In the case where a number of CE routers attach to a PE router via a
+   LAN interface, to ensure proper security, one of the following
+   conditions must hold:
+
+      1. All the CE routers on the LAN belong to the same VPN, or
+
+      2. A trusted and secured LAN switch divides the LAN into multiple
+         VLANs, with each VLAN containing only systems of a single VPN;
+         in this case, the switch will attach the appropriate VLAN tag
+         to any packet before forwarding it to the PE router.
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 38]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   Cryptographic privacy is not provided by this architecture, nor by
+   Frame Relay or ATM VPNs.  These architectures are all compatible with
+   the use of cryptography on a CE-CE basis, if that is desired.
+
+   The use of cryptography on a PE-PE basis is for further study.
+
+13.2.  Control Plane
+
+   The data plane security of the previous section depends on the
+   security of the control plane.  To ensure security, neither BGP nor
+   LDP connections should be made with untrusted peers.  The TCP/IP MD5
+   authentication option [TCP-MD5] should be used with both these
+   protocols.  The routing protocol within the SP's network should also
+   be secured in a similar manner.
+
+13.3.  Security of P and PE Devices
+
+   If the physical security of these devices is compromised, data plane
+   security may also be compromised.
+
+   The usual steps should be taken to ensure that IP traffic from the
+   public Internet cannot be used to modify the configuration of these
+   devices, or to mount Denial of Service attacks on them.
+
+14.  Quality of Service
+
+   Although not the focus of this paper, Quality of Service is a key
+   component of any VPN service.  In MPLS/BGP VPNs, existing L3 QoS
+   capabilities can be applied to labeled packets through the use of the
+   "experimental" bits in the shim header [MPLS-ENCAPS], or, where ATM
+   is used as the backbone, through the use of ATM QoS capabilities.
+   The traffic engineering work discussed in [MPLS-RSVP] is also
+   directly applicable to MPLS/BGP VPNs.  Traffic engineering could even
+   be used to establish label switched paths with particular QoS
+   characteristics between particular pairs of sites, if that is
+   desirable.  Where an MPLS/BGP VPN spans multiple SPs, the
+   architecture described in [PASTE] may be useful.  An SP may apply
+   either intserv (Integrated Services) or diffserv (Differentiated
+   Services) capabilities to a particular VPN, as appropriate.
+
+
+
+
+
+
+
+
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 39]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+15.  Scalability
+
+   We have discussed scalability issues throughout this paper.  In this
+   section, we briefly summarize the main characteristics of our model
+   with respect to scalability.
+
+   The Service Provider backbone network consists of (a) PE routers, (b)
+   BGP Route Reflectors, (c) P routers (that are neither PE routers nor
+   Route Reflectors), and, in the case of multi-provider VPNs, (d)
+   ASBRs.
+
+   P routers do not maintain any VPN routes.  In order to properly
+   forward VPN traffic, the P routers need only maintain routes to the
+   PE routers and the ASBRs.  The use of two levels of labeling is what
+   makes it possible to keep the VPN routes out of the P routers.
+
+   A PE router maintains VPN routes, but only for those VPNs to which it
+   is directly attached.
+
+   Route reflectors can be partitioned among VPNs so that each partition
+   carries routes for only a subset of the VPNs supported by the Service
+   Provider.  Thus, no single route reflector is required to maintain
+   routes for all VPNs.
+
+   For inter-provider VPNs, if the ASBRs maintain and distribute VPN-
+   IPv4 routes, then the ASBRs can be partitioned among VPNs in a
+   similar manner, with the result that no single ASBR is required to
+   maintain routes for all the inter-provider VPNs.  If multi-hop EBGP
+   is used, then the ASBRs need not maintain and distribute VPN-IPv4
+   routes at all.
+
+   As a result, no single component within the Service Provider network
+   has to maintain all the routes for all the VPNs.  So the total
+   capacity of the network to support increasing numbers of VPNs is not
+   limited by the capacity of any individual component.
+
+16.  IANA Considerations
+
+   The Internet Assigned Numbers Authority (IANA) has created a new
+   registry for the "Route Distinguisher Type Field" (see Section 4.2).
+   This is a two-byte field.  Types 0, 1, and 2 are defined by this
+   document.  Additional Route Distinguisher Type Field values with a
+   high-order bit of 0 may be allocated by IANA on a "First Come, First
+   Served" basis [IANA].  Values with a high-order bit of 1 may be
+   allocated by IANA based on "IETF consensus" [IANA].
+
+
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 40]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   This document specifies (see Section 4.3.4) the use of the BGP
+   Address Family Identifier (AFI) value 1, along with the BGP
+   Subsequent Address Family Identifier (SAFI) value 128, to represent
+   the address family "VPN-IPv4 Labeled Addresses", which is defined in
+   this document.
+
+   The use of AFI value 1 for IP is as currently specified in the IANA
+   registry "Address Family Identifier", so IANA need take no action
+   with respect to it.
+
+   The SAFI value 128 was originally specified as "Private Use" in the
+   IANA "Subsequent Address Family Identifier" registry.  IANA has
+   changed the SAFI value 128 from "private use" to "MPLS-labeled VPN
+   address".
+
+17. Acknowledgements
+
+   The full list of contributors can be found in Section 18.
+
+   Significant contributions to this work have also been made by Ravi
+   Chandra, Dan Tappan, and Bob Thomas.
+
+   We also wish to thank Shantam Biswas for his review and
+   contributions.
+
+18.  Contributors
+
+   Tony Bogovic
+   Telcordia Technologies
+   445 South Street, Room 1A264B
+   Morristown, NJ 07960
+
+   EMail: tjb@research.telcordia.com
+
+
+   Stephen John Brannon
+   Swisscom AG
+   Postfach 1570
+   CH-8301
+   Glattzentrum (Zuerich), Switzerland
+
+   EMail: stephen.brannon@swisscom.com
+
+
+
+
+
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 41]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   Marco Carugi
+   Nortel Networks S.A.
+   Parc d'activites de Magny-Les Jeunes Bois  CHATEAUFORT
+   78928 YVELINES Cedex 9 - FRANCE
+
+   EMail: marco.carugi@nortelnetworks.com
+
+
+   Christopher J. Chase
+   AT&T
+   200 Laurel Ave
+   Middletown, NJ 07748
+   USA
+
+   EMail: chase@att.com
+
+
+   Ting Wo Chung
+   Bell Nexxia
+   181 Bay Street
+   Suite 350
+   Toronto, Ontario
+   M5J2T3
+
+   EMail: ting_wo.chung@bellnexxia.com
+
+
+   Eric Dean
+
+
+   Jeremy De Clercq
+   Alcatel Network Strategy Group
+   Francis Wellesplein 1
+   2018 Antwerp, Belgium
+
+   EMail: jeremy.de_clercq@alcatel.be
+
+
+   Luyuan Fang
+   AT&T
+   IP Backbone Architecture
+   200 Laurel Ave.
+   Middletown, NJ 07748
+
+   EMail: luyuanfang@att.com
+
+
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 42]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   Paul Hitchen
+   BT
+   BT Adastral Park
+   Martlesham Heath,
+   Ipswich IP5 3RE
+   UK
+
+   EMail: paul.hitchen@bt.com
+
+
+   Manoj Leelanivas
+   Juniper Networks, Inc.
+   385 Ravendale Drive
+   Mountain View, CA 94043 USA
+
+   EMail: manoj@juniper.net
+
+
+   Dave Marshall
+   Worldcom
+   901 International Parkway
+   Richardson, Texas 75081
+
+   EMail: dave.marshall@wcom.com
+
+
+   Luca Martini
+   Cisco Systems, Inc.
+   9155 East Nichols Avenue, Suite 400
+   Englewood, CO, 80112
+
+   EMail: lmartini@cisco.com
+
+
+   Monique Jeanne Morrow
+   Cisco Systems, Inc.
+   Glatt-com, 2nd floor
+   CH-8301
+   Glattzentrum, Switzerland
+
+   EMail: mmorrow@cisco.com
+
+
+
+
+
+
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 43]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   Ravichander Vaidyanathan
+   Telcordia Technologies
+   445 South Street, Room 1C258B
+   Morristown, NJ 07960
+
+   EMail: vravi@research.telcordia.com
+
+
+   Adrian Smith
+   BT
+   BT Adastral Park
+   Martlesham Heath,
+   Ipswich IP5 3RE
+   UK
+
+   EMail: adrian.ca.smith@bt.com
+
+
+   Vijay Srinivasan
+   1200 Bridge Parkway
+   Redwood City, CA 94065
+
+   EMail: vsriniva@cosinecom.com
+
+
+   Alain Vedrenne
+   Equant
+   Heraklion, 1041 route des Dolines, BP347
+   06906 Sophia Antipolis, Cedex, France
+
+   EMail: Alain.Vedrenne@equant.com
+
+19.  Normative References
+
+   [BGP]             Rekhter, Y. and T. Li, "A Border Gateway Protocol 4
+                     (BGP-4)", RFC 4271, January 2006.
+
+   [BGP-MP]          Bates, T., Rekhter, Y., Chandra, R., and D. Katz,
+                     "Multiprotocol Extensions for BGP-4", RFC 2858,
+                     June 2000.
+
+   [BGP-EXTCOMM]     Sangli, S., Tappan, D., and Y. Rekhter, "BGP
+                     Extended Communities Attribute", RFC 4360, February
+                     2006.
+
+   [MPLS-ARCH]       Rosen, E., Viswanathan, A., and R. Callon,
+                     "Multiprotocol Label Switching Architecture", RFC
+                     3031, January 2001.
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 44]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   [MPLS-BGP]        Rekhter, Y. and E. Rosen, "Carrying Label
+                     Information in BGP-4", RFC 3107, May 2001.
+
+   [MPLS-ENCAPS]     Rosen, E., Tappan, D., Fedorkow, G., Rekhter, Y.,
+                     Farinacci, D., Li, T., and A. Conta, "MPLS Label
+                     Stack Encoding", RFC 3032, January 2001.
+
+20.  Informative References
+
+   [BGP-AS4]         Vohra, Q. and E. Chen, "BGP Support for Four-Octet
+                     AS Number Space", Work in Progress, March 2004.
+
+   [BGP-ORF]         Chen, E. and Y. Rekhter, "Cooperative Route
+                     Filtering Capability for BGP-4", Work in Progress,
+                     March 2004.
+
+   [BGP-RFSH]        Chen, E., "Route Refresh Capability for BGP-4", RFC
+                     2918, September 2000.
+
+   [BGP-RR]          Bates, T., Chandra, R., and E. Chen, "BGP Route
+                     Reflection - An Alternative to Full Mesh IBGP", RFC
+                     2796, April 2000.
+
+   [IANA]            Narten, T. and H. Alvestrand, "Guidelines for
+                     Writing an IANA Considerations Section in RFCs",
+                     BCP 26, RFC 2434, October 1998.
+
+   [MPLS-ATM]        Davie, B., Lawrence, J., McCloghrie, K., Rosen, E.,
+                     Swallow, G., Rekhter, Y., and P. Doolan, "MPLS
+                     using LDP and ATM VC Switching", RFC 3035, January
+                     2001.
+
+   [MPLS/BGP-IPsec]  Rosen, E., De Clercq, J., Paridaens, O., T'Joens,
+                     Y., and C. Sargor, "Architecture for the Use of
+                     PE-PE IPsec Tunnels in BGP/MPLS IP VPNs", Work in
+                     Progress, March 2004.
+
+   [MPLS-FR]         Conta, A., Doolan, P., and A. Malis, "Use of Label
+                     Switching on Frame Relay Networks Specification",
+                     RFC 3034, January 2001.
+
+   [MPLS-in-IP-GRE]  Worster, T., Rekhter, Y., and E. Rosen,
+                     "Encapsulating MPLS in IP or Generic Routing
+                     Encapsulation (GRE)", RFC 4023, March 2005.
+
+   [MPLS-LDP]        Andersson, L., Doolan, P., Feldman, N., Fredette,
+                     A., and B. Thomas, "LDP Specification", RFC 3036,
+                     January 2001.
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 45]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+   [MPLS-RSVP]       Awduche, D., Berger, L., Gan, D., Li, T.,
+                     Srinivasan, V., and G. Swallow, "RSVP-TE:
+                     Extensions to RSVP for LSP Tunnels", RFC 3209,
+                     December 2001.
+
+   [OSPFv2]          Moy, J., "OSPF Version 2", STD 54, RFC 2328, April
+                     1998.
+
+   [PASTE]           Li, T. and Y. Rekhter, "A Provider Architecture for
+                     Differentiated Services and Traffic Engineering
+                     (PASTE)", RFC 2430, October 1998.
+
+   [RIP]             Malkin, G., "RIP Version 2", STD 56, RFC 2453,
+                     November 1998.
+
+   [OSPF-2547-DNBIT] Rosen, E., Psenak, P., and P. Pillay-Esnault,
+                     "Using an LSA Options Bit to Prevent Looping in
+                     BGP/MPLS IP VPNs", Work in Progress, March 2004.
+
+   [TCP-MD5]         Heffernan, A., "Protection of BGP Sessions via the
+                     TCP MD5 Signature Option", RFC 2385, August 1998.
+
+   [VPN-MCAST]       Rosen, E., Cai, Y., and J. Wijsnands, "Multicast in
+                     MPLS/BGP VPNs", Work in Progress, May 2004.
+
+   [VPN-OSPF]        Rosen, E., Psenak, P., and P. Pillay-Esnault, "OSPF
+                     as the PE/CE Protocol in BGP/MPLS VPNs", Work in
+                     Progress, February 2004.
+
+Authors' Addresses
+
+   Eric C. Rosen
+   Cisco Systems, Inc.
+   1414 Massachusetts Avenue
+   Boxborough, MA 01719
+
+   EMail: erosen@cisco.com
+
+
+   Yakov Rekhter
+   Juniper Networks
+   1194 N. Mathilda Avenue
+   Sunnyvale, CA 94089
+
+   EMail: yakov@juniper.net
+
+
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 46]
+
+RFC 4364                    BGP/MPLS IP VPNs               February 2006
+
+
+Full Copyright Statement
+
+   Copyright (C) The Internet Society (2006).
+
+   This document is subject to the rights, licenses and restrictions
+   contained in BCP 78, and except as set forth therein, the authors
+   retain all their rights.
+
+   This document and the information contained herein are provided on an
+   "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS
+   OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET
+   ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED,
+   INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE
+   INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED
+   WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Intellectual Property
+
+   The IETF takes no position regarding the validity or scope of any
+   Intellectual Property Rights or other rights that might be claimed to
+   pertain to the implementation or use of the technology described in
+   this document or the extent to which any license under such rights
+   might or might not be available; nor does it represent that it has
+   made any independent effort to identify any such rights.  Information
+   on the procedures with respect to rights in RFC documents can be
+   found in BCP 78 and BCP 79.
+
+   Copies of IPR disclosures made to the IETF Secretariat and any
+   assurances of licenses to be made available, or the result of an
+   attempt made to obtain a general license or permission for the use of
+   such proprietary rights by implementers or users of this
+   specification can be obtained from the IETF on-line IPR repository at
+   http://www.ietf.org/ipr.
+
+   The IETF invites any interested party to bring to its attention any
+   copyrights, patents or patent applications, or other proprietary
+   rights that may cover technology that may be required to implement
+   this standard.  Please address the information to the IETF at
+   ietf-ipr@ietf.org.
+
+Acknowledgement
+
+   Funding for the RFC Editor function is provided by the IETF
+   Administrative Support Activity (IASA).
+
+
+
+
+
+
+
+Rosen & Rekhter             Standards Track                    [Page 47]
+
diff --git a/monitor/monitor_backend.go b/monitor/monitor_backend.go
@@ -0,0 +1,22 @@
+package monitor
+
+import (
+	//"log"
+	"github.com/gocql/gocql"
+)
+
+type CassandraContext struct {
+	cconfig *gocql.ClusterConfig
+	session *gocql.Session
+}
+
+func (c *CassandraContext) StartCassandra(kspace string, addrs ...string) (err error) {
+	c.cconfig = gocql.NewCluster(addrs...)
+	c.cconfig.Keyspace = kspace
+	c.session, err = c.cconfig.CreateSession()
+	return
+}
+
+func (c *CassandraContext) StopCassandra() {
+	c.session.Close()
+}
diff --git a/monitor/monitor_backend_test.go b/monitor/monitor_backend_test.go
@@ -0,0 +1,32 @@
+package monitor
+
+import (
+	"testing"
+)
+
+var (
+	conerr error
+)
+
+func TestStartStop(t *testing.T) {
+	c := &CassandraContext{}
+	conerr = c.StartCassandra("bgp_mongol_test", "worf.netsec.colostate.edu")
+	if conerr != nil {
+		t.Logf("could not conect to test cassandra instance at worf.netsec.colostate.edu. not failing test")
+		return
+	}
+	c.StopCassandra()
+}
+
+func TestCreateTable(t *testing.T) {
+	c := &CassandraContext{}
+	conerr = c.StartCassandra("bgp_mongol_test", "worf.netsec.colostate.edu")
+	if conerr != nil {
+		t.Logf("could not conect to test cassandra instance at worf.netsec.colostate.edu. not failing test")
+		return
+	}
+	if err := c.session.Query("CREATE TABLE bmf ( prefix text PRIMARY KEY, dat1 text, dat2 text);").Exec(); err != nil {
+		t.Logf(" create table returned error :%v \n", err)
+	}
+	c.StopCassandra()
+}
diff --git a/mrt/mrt.go b/mrt/mrt.go
@@ -0,0 +1,377 @@
+package mrt
+
+import (
+	"bytes"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"io"
+	"log"
+	"net"
+	"os"
+	"unicode/utf8"
+	//"runtime"
+)
+
+var logger = log.New(os.Stderr, "go-mrt: ", log.Ldate|log.Llongfile)
+
+type MrtHdr struct {
+	Mrt_timestamp uint32
+	Mrt_type      uint16
+	Mrt_subtype   uint16
+	Mrt_len       uint32
+}
+
+type parsefunc func([]byte) MrtSubTyper
+
+type MrtMsg struct {
+	Hdr    MrtHdr
+	BGPMsg []byte
+}
+
+type MrtSubTyper interface {
+	Type() string //almost dummy functionality
+	String() string
+}
+
+type MrtOSPFHdr struct {
+	otype    uint16
+	RemoteIP uint32
+	LocalIP  uint32
+}
+
+func (m *MrtOSPFHdr) Type() string {
+	return "OSPFHdr"
+}
+
+func (m *MrtOSPFHdr) String() string {
+	remip := make(net.IP, 4)
+	locip := make(net.IP, 4)
+	remip[0] = byte(m.RemoteIP)
+	remip[1] = byte(m.RemoteIP >> 8)
+	remip[2] = byte(m.RemoteIP >> 16)
+	remip[3] = byte(m.RemoteIP >> 24)
+	locip[0] = byte(m.LocalIP)
+	locip[1] = byte(m.LocalIP >> 8)
+	locip[2] = byte(m.LocalIP >> 16)
+	locip[3] = byte(m.LocalIP >> 24)
+	return fmt.Sprintf("OSPF Header. Type [%d] Remote IP [%s] Local IP [%s]", m.otype, remip, locip)
+}
+
+type MrtInfoMsg struct {
+	inftype uint16
+	optmsg  string
+}
+
+func (m *MrtInfoMsg) String() string {
+	return fmt.Sprintf("Informational Message. Type [%v] Optstring [%s]", m.inftype, m.optmsg)
+}
+
+func (m *MrtInfoMsg) Type() string {
+	return m.String()
+}
+
+func (m *MrtMsg) PFunc() (ret parsefunc, ok bool) {
+	var subtype = m.Hdr.Mrt_subtype
+	var mtype = m.Hdr.Mrt_type
+	infofunc := func(a []byte) MrtSubTyper {
+		runes := []rune{}
+		infomsg := &MrtInfoMsg{inftype: mtype, optmsg: "No Optional Message"}
+		for len(a) > 0 {
+			r, sz := utf8.DecodeRune(a)
+			if r == utf8.RuneError {
+				logger.Println("failed to decode rune in optional message")
+				return infomsg
+			}
+			a = a[sz:]
+			runes = append(runes, r)
+		}
+		if len(runes) > 0 {
+			infomsg.optmsg = string(runes)
+		}
+		return infomsg
+	}
+
+	ospffunc := func(a []byte) MrtSubTyper {
+		ret := &MrtOSPFHdr{otype: subtype}
+		buf := bytes.NewReader(a)
+		err := binary.Read(buf, binary.BigEndian, &ret.RemoteIP)
+		err = binary.Read(buf, binary.BigEndian, &ret.LocalIP)
+		if err != nil {
+			panic(fmt.Sprintf("error while reading binary OSPF header: %s", err))
+		}
+		return ret
+	}
+
+	bgp4mpscfunc := func(a []byte) MrtSubTyper {
+		ret := &MrtBGP4MPStateChangeHdr{}
+		buf := bytes.NewReader(a)
+		if subtype == BGP4MP_STATE_CHANGE {
+			ret.PeerASN = make([]byte, 2)
+			ret.LocalASN = make([]byte, 2)
+		} else {
+			ret.PeerASN = make([]byte, 4)
+			ret.LocalASN = make([]byte, 4)
+		}
+		err := binary.Read(buf, binary.BigEndian, &ret.PeerASN)
+		if err != nil {
+			panic(fmt.Sprintf("error while reading binary BGP4MP header: %s", err))
+		}
+		binary.Read(buf, binary.BigEndian, &ret.LocalASN)
+		binary.Read(buf, binary.BigEndian, &ret.InterfaceInd)
+		binary.Read(buf, binary.BigEndian, &ret.AddrFamily)
+		if ret.AddrFamily == 1 {
+			ret.PeerIP = make([]byte, 4)
+			ret.LocalIP = make([]byte, 4)
+		} else if ret.AddrFamily == 2 {
+			ret.PeerIP = make([]byte, 16)
+			ret.LocalIP = make([]byte, 16)
+		}
+		binary.Read(buf, binary.BigEndian, &ret.PeerIP)
+		binary.Read(buf, binary.BigEndian, &ret.LocalIP)
+		binary.Read(buf, binary.BigEndian, &ret.OldState)
+		binary.Read(buf, binary.BigEndian, &ret.NewState)
+		return ret
+	}
+
+	bgp4mpmsgfunc := func(a []byte) MrtSubTyper {
+		ret := &MrtBGP4MPMsgHdr{}
+		buf := bytes.NewReader(a)
+		if subtype == BGP4MP_MESSAGE {
+			ret.PeerASN = make([]byte, 2)
+			ret.LocalASN = make([]byte, 2)
+		} else if subtype == BGP4MP_MESSAGE_AS4 {
+			ret.PeerASN = make([]byte, 4)
+			ret.LocalASN = make([]byte, 4)
+		}
+		err := binary.Read(buf, binary.BigEndian, &ret.PeerASN)
+		if err != nil {
+			panic(fmt.Sprintf("error while reading binary BGP4MP header: %s", err))
+		}
+		binary.Read(buf, binary.BigEndian, &ret.LocalASN)
+		binary.Read(buf, binary.BigEndian, &ret.InterfaceInd)
+		//fmt.Printf("ADdr family should be:%v\n", binary.BigEndian.Uint16(a[6:8]))
+		binary.Read(buf, binary.BigEndian, &ret.AddrFamily)
+		if ret.AddrFamily == 1 {
+			ret.PeerIP = make([]byte, 4)
+			ret.LocalIP = make([]byte, 4)
+		} else if ret.AddrFamily == 2 {
+			ret.PeerIP = make([]byte, 16)
+			ret.LocalIP = make([]byte, 16)
+		} else {
+			panic("Address Family in BGP4MP msg func is wrong")
+		}
+		binary.Read(buf, binary.BigEndian, &ret.PeerIP)
+		binary.Read(buf, binary.BigEndian, &ret.LocalIP)
+		return ret
+	}
+
+	ret = nil
+	ok = false
+	switch mtype {
+	case MSG_PROTOCOL_BGP4MP:
+		if subtype == BGP4MP_STATE_CHANGE || subtype == BGP4MP_STATE_CHANGE_AS4 {
+			ret, ok = bgp4mpscfunc, true
+		} else if subtype == BGP4MP_MESSAGE || subtype == BGP4MP_MESSAGE_AS4 ||
+			subtype == BGP4MP_MESSAGE_LOCAL || subtype == BGP4MP_MESSAGE_AS4_LOCAL {
+			ret, ok = bgp4mpmsgfunc, true
+		}
+	case MSG_START, MSG_I_AM_DEAD:
+		if subtype == 0 {
+			ret, ok = infofunc, true
+		} else {
+			logger.Println("Mrt type is Informational but Subtype non-zero")
+		}
+	case MSG_PROTOCOL_OSPF:
+		if subtype == 0 || subtype == 1 {
+			ret, ok = ospffunc, true
+		} else {
+			logger.Println("Mrt type is OSPF but Subtype is neither 0 or 1")
+		}
+	case MSG_NULL, MSG_DIE, MSG_PEER_DOWN, MSG_PROTOCOL_BGP, MSG_PROTOCOL_IDRP, MSG_PROTOCOL_BGP4PLUS, MSG_PROTOCOL_BGP4PLUS1:
+		logger.Println("Deprecated message type")
+	default:
+		logger.Printf("unknown. header [%v]\n", m.Hdr)
+	}
+	return
+}
+
+type MrtBGP4MPStateChangeHdr struct {
+	PeerASN      []byte
+	LocalASN     []byte
+	InterfaceInd uint16
+	AddrFamily   uint16
+	PeerIP       []byte
+	LocalIP      []byte
+	OldState     uint16
+	NewState     uint16
+}
+
+func (m *MrtBGP4MPStateChangeHdr) Type() string {
+	return "BGP4MPStateChange"
+}
+
+func (m *MrtBGP4MPStateChangeHdr) String() string {
+	return "BGP4MPStateChange"
+}
+
+type MrtBGP4MPMsgHdr struct {
+	PeerASN      []byte
+	LocalASN     []byte
+	InterfaceInd uint16
+	AddrFamily   uint16
+	PeerIP       []byte
+	LocalIP      []byte
+}
+
+func (m *MrtBGP4MPMsgHdr) Type() string {
+	return "BGP4MPMsg"
+}
+
+func (m *MrtBGP4MPMsgHdr) String() string {
+	if len(m.PeerIP) < 4 || len(m.LocalIP) < 4 {
+		return "BGP4MPMsg unable to read IPs"
+	}
+	return fmt.Sprintf("LocalIP:%s RemoteIP:%s", net.IPv4(m.PeerIP[0], m.PeerIP[1], m.PeerIP[2], m.PeerIP[3]), net.IPv4(m.LocalIP[0], m.LocalIP[1], m.LocalIP[2], m.LocalIP[3]))
+}
+
+type MrtTableDumpV1Hdr struct {
+	ViewNum   uint16
+	SeqNum    uint16
+	Prefix    []byte
+	PrefixLen uint8
+	Status    uint8
+	OrigTime  uint32
+	PeerIP    []byte
+	PeerAS    uint16
+	AttrLen   uint16
+}
+
+func (m *MrtTableDumpV1Hdr) Type() string {
+	return "TableDumpV1Hdr"
+}
+
+func (m *MrtTableDumpV1Hdr) String() string {
+	return "TableDumpV1Hdr"
+}
+
+type MrtFile struct {
+	file    io.Reader
+	entries uint32
+	off     int64
+}
+
+const (
+	MrtHdr_size = 12
+	dump_size   = 10000
+)
+
+// mrt-type consts
+const (
+	MSG_NULL               = iota //  0 empty msg (deprecated)
+	MSG_START                     //  1 sender is starting up
+	MSG_DIE                       //  2 receiver should shut down (deprecated)
+	MSG_I_AM_DEAD                 //  3 sender is shutting down
+	MSG_PEER_DOWN                 //  4 sender's peer is down (deprecated)
+	MSG_PROTOCOL_BGP              //  5 msg is a BGP packet (deprecated)
+	MSG_PROTOCOL_RIP              //  6 msg is a RIP packet
+	MSG_PROTOCOL_IDRP             //  7 msg is an IDRP packet (deprecated)
+	MSG_PROTOCOL_RIPNG            //  8 msg is a RIPNG packet
+	MSG_PROTOCOL_BGP4PLUS         //  9 msg is a BGP4+ packet (deprecated)
+	MSG_PROTOCOL_BGP4PLUS1        // 10 msg is a BGP4+ (draft 01) (deprecated)
+	MSG_PROTOCOL_OSPF             // 11 msg is an OSPF packet
+	MSG_TABLE_DUMP                // 12 routing table dump
+	MSG_TABLE_DUMP_V2             // 13 routing table dump
+	MSG_PROTOCOL_BGP4MP    = 16   // 16 zebras own packet format
+	MSG_PROTOCOL_BGP4MP_ET = 17
+	MSG_PROTOCOL_ISIS      = 32 // 32 msg is a ISIS package
+	MSG_PROTOCOL_ISIS_ET   = 33
+	MSG_PROTOCOL_OSPFV3    = 48 // 48 msg is a OSPFv3 package
+	MSG_PROTOCOL_OSPFV3_ET = 49
+)
+
+// mrt-subtype consts
+const (
+	BGP4MP_STATE_CHANGE      = 0 // state change
+	BGP4MP_MESSAGE           = 1 // bgp message
+	BGP4MP_MESSAGE_AS4       = 4 // same as BGP4MP_MESSAGE with 4byte AS
+	BGP4MP_STATE_CHANGE_AS4  = 5
+	BGP4MP_MESSAGE_LOCAL     = 6 // same as BGP4MP_MESSAGE but for self
+	BGP4MP_MESSAGE_AS4_LOCAL = 7 // originated updates. Not implemented
+)
+
+const (
+	OSPF_STATE_CHANGE = iota
+	OSPF_LSA_UPDATE
+)
+
+func NewMrtHdr(b []byte) (ret MrtHdr, err error) {
+	buf := bytes.NewReader(b)
+	err = binary.Read(buf, binary.BigEndian, &ret)
+	return
+}
+
+func NewMrtFile(f io.Reader) (ret MrtFile) {
+	ret = MrtFile{f, 0, 0}
+	return
+}
+
+//This function can be passed into a bufio.Scanner.Split() to read buffered
+//mrt msgs
+func SplitMrt(data []byte, atEOF bool) (advance int, token []byte, err error) {
+	if atEOF && len(data) == 0 {
+		return 0, nil, nil
+	}
+	if cap(data) < MrtHdr_size { // read more
+		return 0, nil, nil
+	}
+	//this reads the data and (they are big endian so it handles that)
+	hdr, errh := NewMrtHdr(data[:MrtHdr_size])
+	if errh != nil {
+		return 0, nil, errh
+	}
+	totlen := int(hdr.Mrt_len + MrtHdr_size)
+	if len(data) < totlen { //need to read more
+		return 0, nil, nil
+	}
+	//logger.Printf("scanned mrt with len:%d datalen is :%d", totlen, len(data))
+	return totlen, data[0:totlen], nil
+}
+
+func (f *MrtFile) Read(b []byte) (n int, err error) {
+	//fmt.Printf(" b len:%v cap:%v\n",len(b), cap(b))
+	if cap(b) < MrtHdr_size {
+		err = errors.New("buffer size less than header size")
+		return
+	}
+	n, err = f.file.Read(b[:MrtHdr_size])
+	if err != nil {
+		return
+	}
+	hdr, errh := NewMrtHdr(b[:MrtHdr_size])
+	if errh != nil {
+		err = errors.New(fmt.Sprintf("error in reading header from offset %v : %s", f.off, errh))
+		return
+	}
+	//fmt.Printf("got header at offset:%d ! :%v\n", f.off, hdr)
+	//n = int(hdr.Mrt_len+MrtHdr_size)
+	//f.off = f.off + int64(n)
+	f.entries = f.entries + 1
+	//this will just jump over the msg
+	//noff,errs := f.file.Seek(int64(hdr.Mrt_len), os.SEEK_CUR)
+	if dump_size-(hdr.Mrt_len+MrtHdr_size) <= 0 {
+		err = errors.New(fmt.Sprintf("bgp message of size:%v at offset is too large", hdr.Mrt_len, f.off+MrtHdr_size))
+		return
+	}
+	//fmt.Printf("i will access b[%v:%v] len:%v cap:%v\n",MrtHdr_size, hdr.Mrt_len+MrtHdr_size, len(b), cap(b))
+	nr, err := f.file.Read(b[MrtHdr_size : hdr.Mrt_len+MrtHdr_size])
+	if nr != int(hdr.Mrt_len) {
+		n = n + nr //header + len of read
+		err = errors.New(fmt.Sprintf("error in reading bgp message of size :%v . got :%v bytes.", hdr.Mrt_len, n))
+		return
+	}
+	n = n + nr
+	f.off += int64(n)
+	//fmt.Printf("seeked at offset:%d \n", f.off)
+	return
+}
diff --git a/mrt/mrt_test.go b/mrt/mrt_test.go
@@ -0,0 +1,118 @@
+package mrt
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"net"
+	"os"
+	"testing"
+)
+
+func TestMrtHdr(t *testing.T) {
+	buf := new(bytes.Buffer)
+	var tdate, tlen uint32 = 1, 4
+	var ttype, tsubtype uint16 = 2, 3
+	mrt := &MrtHdr{tdate, ttype, tsubtype, tlen}
+	fmt.Printf("date:%v type:%v subtype:%v len:%v\n", tdate, ttype, tsubtype, tlen)
+	binary.Write(buf, binary.BigEndian, mrt)
+	fmt.Printf("binary mrt: %x\n", buf.Bytes())
+	mhdr, err := NewMrtHdr(buf.Bytes())
+	if err != nil {
+		t.Fatal(err)
+	}
+	fmt.Printf("recreating MrtHdr from binary :%+v \n", mhdr)
+}
+
+func TestMrtPFunc(t *testing.T) {
+	var (
+		tt1, ts1 = uint16(1), uint16(0)  //start
+		tt2, ts2 = uint16(3), uint16(1)  //i am dead , but wrong subtype
+		tt3, ts3 = uint16(2), uint16(0)  //deprecated
+		tt4, ts4 = uint16(11), uint16(0) //ospf state change
+		tbuf     = []byte{0, 0, 0, 0, 0, 0, 0, 0}
+		tf       parsefunc
+		ok       bool
+	)
+	//binbuf := new(bytes.Buffer)
+	mrt1 := &MrtMsg{
+		Hdr:    MrtHdr{1, tt1, ts1, 10},
+		BGPMsg: tbuf,
+	}
+	mrt2 := &MrtMsg{
+		Hdr:    MrtHdr{1, tt2, ts2, 10},
+		BGPMsg: tbuf,
+	}
+	mrt3 := &MrtMsg{
+		Hdr:    MrtHdr{1, tt3, ts3, 10},
+		BGPMsg: tbuf,
+	}
+	mrt4 := &MrtMsg{
+		Hdr:    MrtHdr{1, tt4, ts4, 10},
+		BGPMsg: tbuf,
+	}
+	fmt.Println("trying to parse informational message")
+	if tf, ok = mrt1.PFunc(); !ok {
+		t.Fatal("tf should be non nil")
+	}
+	hdr := tf(mrt1.BGPMsg)
+	fmt.Printf("type is :%s\n", hdr.Type())
+	fmt.Println("trying to parse informational message with opt string")
+	mrt1.BGPMsg = []byte{'f', 'o', 'o', ' ', 's', 't', 'r'}
+	mrt1.Hdr.Mrt_type = tt2
+	if tf, ok = mrt1.PFunc(); !ok {
+		t.Fatal("tf should be non nil")
+	}
+	hdr = tf(mrt1.BGPMsg)
+	fmt.Printf("type is :%s\n", hdr.Type())
+	fmt.Println("trying to parse malformed informational message")
+	if tf, ok = mrt2.PFunc(); ok {
+		t.Fatal("this should fail with tf being nil cause subtype is non-0")
+	}
+	fmt.Println("trying to parse deprecated message")
+	if tf, ok = mrt3.PFunc(); ok {
+		t.Fatal("this should fail with tf being nil cause it's deprecated")
+	}
+	fmt.Println("trying to parse OSPF message")
+	//first call to littleendian to come to hostbyteorder and then switch to big
+	binary.BigEndian.PutUint32(mrt4.BGPMsg[:4], binary.LittleEndian.Uint32(net.IPv4(1, 2, 3, 4).To4()))
+	binary.BigEndian.PutUint32(mrt4.BGPMsg[4:], binary.LittleEndian.Uint32(net.IPv4(5, 6, 7, 8).To4()))
+	//binary.Write(binbuf, binary.BigEndian, net.IPv4allsys.To4())
+	//mrt4.BGPMsg = make([]byte,8)
+	//mrt4.BGPMsg = binbuf.Bytes()
+	//copy(mrt4.BGPMsg,binbuf.Bytes())
+	if tf, ok = mrt4.PFunc(); !ok {
+		t.Fatal("this shouldn't fail")
+	}
+	hdr = tf(mrt4.BGPMsg)
+	fmt.Printf("type is :%s .String representation: %s\n", hdr.Type(), hdr)
+}
+
+func TestScan(t *testing.T) {
+	fmt.Println("testing the scanner interface")
+	f, err := os.Open("../tests/mrt3")
+	if err != nil {
+		t.Fatal(err)
+	}
+	mrtscanner := bufio.NewScanner(f)
+	mrtscanner.Split(SplitMrt)
+	count := 0
+	for mrtscanner.Scan() {
+		count++
+		dat := mrtscanner.Bytes()
+		h, _ := NewMrtHdr(dat[:MrtHdr_size]) /* the error has been checked in Read() */
+		if h.Mrt_len == 0 {
+			t.Logf("terminating from 0 mrt len")
+			return
+		}
+		mrtmsg := MrtMsg{Hdr: h, BGPMsg: dat[MrtHdr_size:]}
+		if tf, ok := mrtmsg.PFunc(); ok {
+			tf(mrtmsg.BGPMsg)
+		}
+	}
+	if err := mrtscanner.Err(); err != nil {
+		fmt.Printf("error: %s", err)
+	}
+	fmt.Printf("scanned and parsed: %d entries from bufio\n", count)
+}
diff --git a/tests/mrt1 b/tests/mrt1
Binary files differ.
diff --git a/tests/mrt2 b/tests/mrt2
Binary files differ.
diff --git a/tests/mrt3 b/tests/mrt3
Binary files differ.

	go-bgp a collection of golang BGP tools to monitor, archive and serve
	git clone git://git.2f30.org/go-bgp
	Log \| Files \| Refs \| README

A	Makefile	\|	12	++++++++++++
A	README	\|	60	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	archive/archive.go	\|	607	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	bgp.go	\|	1	+
A	cmd/archive_server.go	\|	51	+++++++++++++++++++++++++++++++++++++++++++++++++++
A	doc/draft-ietf-grow-mrt-11.txt	\|	1625	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	doc/rfc1771.txt	\|	3195	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	doc/rfc4360.txt	\|	675	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	doc/rfc4364.txt	\|	2635	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	monitor/monitor_backend.go	\|	22	++++++++++++++++++++++
A	monitor/monitor_backend_test.go	\|	32	++++++++++++++++++++++++++++++++
A	mrt/mrt.go	\|	377	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	mrt/mrt_test.go	\|	118	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	tests/mrt1	\|	0
A	tests/mrt2	\|	0
A	tests/mrt3	\|	0