// Package camelcase is a micro package to split the words of a camelcase type // string into a slice of words. package camelcase import ( "unicode" "unicode/utf8" ) // Split splits the camelcase word and returns a list of words. It also // supports digits. Both lower camel case and upper camel case are supported. // For more info please check: http://en.wikipedia.org/wiki/CamelCase // // Examples // // "" => [""] // "lowercase" => ["lowercase"] // "Class" => ["Class"] // "MyClass" => ["My", "Class"] // "MyC" => ["My", "C"] // "HTML" => ["HTML"] // "PDFLoader" => ["PDF", "Loader"] // "AString" => ["A", "String"] // "SimpleXMLParser" => ["Simple", "XML", "Parser"] // "vimRPCPlugin" => ["vim", "RPC", "Plugin"] // "GL11Version" => ["GL", "11", "Version"] // "99Bottles" => ["99", "Bottles"] // "May5" => ["May", "5"] // "BFG9000" => ["BFG", "9000"] // "BöseÜberraschung" => ["Böse", "Überraschung"] // "Two spaces" => ["Two", " ", "spaces"] // "BadUTF8\xe2\xe2\xa1" => ["BadUTF8\xe2\xe2\xa1"] // // Splitting rules // // 1) If string is not valid UTF-8, return it without splitting as // single item array. // 2) Assign all unicode characters into one of 4 sets: lower case // letters, upper case letters, numbers, and all other characters. // 3) Iterate through characters of string, introducing splits // between adjacent characters that belong to different sets. // 4) Iterate through array of split strings, and if a given string // is upper case: // if subsequent string is lower case: // move last character of upper case string to beginning of // lower case string func Split(src string) (entries []string) { // don't split invalid utf8 if !utf8.ValidString(src) { return []string{src} } entries = []string{} var runes [][]rune lastClass := 0 class := 0 // split into fields based on class of unicode character for _, r := range src { switch true { case unicode.IsLower(r): class = 1 case unicode.IsUpper(r): class = 2 case unicode.IsDigit(r): class = 3 default: class = 4 } if class == lastClass { runes[len(runes)-1] = append(runes[len(runes)-1], r) } else { runes = append(runes, []rune{r}) } lastClass = class } // handle upper case -> lower case sequences, e.g. // "PDFL", "oader" -> "PDF", "Loader" for i := 0; i < len(runes)-1; i++ { if unicode.IsUpper(runes[i][0]) && unicode.IsLower(runes[i+1][0]) { runes[i+1] = append([]rune{runes[i][len(runes[i])-1]}, runes[i+1]...) runes[i] = runes[i][:len(runes[i])-1] } } // construct []string from results for _, s := range runes { if len(s) > 0 { entries = append(entries, string(s)) } } return }