// Package license classifies a repository's LICENSE text into a known license // for display (badge, category color, canonical link). Detection is heuristic — // it keys off the distinctive phrases each license uses — and degrades to an // "unknown" license rather than guessing. package license import "strings" // License is a classified license for display. type License struct { SPDX string // e.g. "CC-BY-NC-SA-4.0", "MIT" ("" if unknown) Short string // badge text, e.g. "CC BY-NC-SA", "MIT", "AGPL-3.0" Name string // full name URL string // canonical reference Category string // permissive | weak-copyleft | copyleft | cc | public-domain | unknown CC []string // CC clause chips: BY, NC, SA, ND (empty for non-CC) Path string // repo path of the source LICENSE file (set by caller) } // Detect classifies LICENSE file contents. func Detect(b []byte) License { s := strings.ToLower(string(b)) switch { case has(s, "creative commons") || strings.Contains(s, "creativecommons.org/licenses") || strings.Contains(s, "creativecommons.org/publicdomain"): return detectCC(s) case has(s, "gnu affero general public license"): return mk("AGPL-3.0", "AGPL-3.0", "GNU Affero General Public License v3.0", "https://www.gnu.org/licenses/agpl-3.0", "copyleft") case has(s, "gnu lesser general public license"): return mk("LGPL-3.0", "LGPL-3.0", "GNU Lesser General Public License v3.0", "https://www.gnu.org/licenses/lgpl-3.0", "weak-copyleft") case has(s, "gnu general public license"): if has(s, "version 2") { return mk("GPL-2.0", "GPL-2.0", "GNU General Public License v2.0", "https://www.gnu.org/licenses/gpl-2.0", "copyleft") } return mk("GPL-3.0", "GPL-3.0", "GNU General Public License v3.0", "https://www.gnu.org/licenses/gpl-3.0", "copyleft") case has(s, "apache license") && has(s, "version 2.0"): return mk("Apache-2.0", "Apache-2.0", "Apache License 2.0", "https://www.apache.org/licenses/LICENSE-2.0", "permissive") case has(s, "mozilla public license") && has(s, "version 2.0"): return mk("MPL-2.0", "MPL-2.0", "Mozilla Public License 2.0", "https://www.mozilla.org/MPL/2.0/", "weak-copyleft") case has(s, "mit license") || (has(s, "permission is hereby granted, free of charge") && !has(s, "without restriction. ")): return mk("MIT", "MIT", "MIT License", "https://opensource.org/license/mit", "permissive") case has(s, "redistribution and use in source and binary forms"): if has(s, "neither the name") { return mk("BSD-3-Clause", "BSD-3-Clause", "BSD 3-Clause License", "https://opensource.org/license/bsd-3-clause", "permissive") } return mk("BSD-2-Clause", "BSD-2-Clause", "BSD 2-Clause License", "https://opensource.org/license/bsd-2-clause", "permissive") case has(s, "isc license") || has(s, "internet systems consortium"): return mk("ISC", "ISC", "ISC License", "https://opensource.org/license/isc-license-txt", "permissive") case has(s, "this is free and unencumbered software released into the public domain"): return mk("Unlicense", "Unlicense", "The Unlicense", "https://unlicense.org/", "public-domain") } return License{Category: "unknown", Short: "License"} } // detectCC resolves a Creative Commons license, including clause set + version. func detectCC(s string) License { if has(s, "cc0") || has(s, "public domain dedication") { l := mk("CC0-1.0", "CC0", "Creative Commons Zero v1.0 Universal", "https://creativecommons.org/publicdomain/zero/1.0/", "public-domain") return l } // Clauses, from URL token (by-nc-sa) or the human title. clauses := []string{"BY"} // every non-CC0 CC license is at least Attribution if has(s, "noncommercial") || hasToken(s, "nc") { clauses = append(clauses, "NC") } if has(s, "sharealike") || hasToken(s, "sa") { clauses = append(clauses, "SA") } nd := has(s, "noderivatives") || has(s, "no derivative") || hasToken(s, "nd") if nd { clauses = append(clauses, "ND") } ver := ccVersion(s) slug := ccSlug(clauses) // by-nc-sa return License{ SPDX: "CC-" + strings.ToUpper(slug) + "-" + ver, Short: "CC " + ccShort(clauses), Name: "Creative Commons " + ccShort(clauses) + " " + ver, URL: "https://creativecommons.org/licenses/" + slug + "/" + ver + "/", Category: "cc", CC: clauses, } } func ccSlug(clauses []string) string { parts := make([]string, 0, len(clauses)) for _, c := range clauses { parts = append(parts, strings.ToLower(c)) } return strings.Join(parts, "-") } func ccShort(clauses []string) string { return strings.Join(clauses, "-") } func ccVersion(s string) string { for _, v := range []string{"4.0", "3.0", "2.5", "2.0", "1.0"} { if strings.Contains(s, v) { return v } } return "4.0" } func has(s, sub string) bool { return strings.Contains(s, sub) } // hasToken matches a standalone token like "nc" inside a license URL slug // (by-nc-sa) without matching arbitrary substrings. func hasToken(s, tok string) bool { return strings.Contains(s, "-"+tok+"-") || strings.Contains(s, "-"+tok+"/") || strings.Contains(s, "/"+tok+"-") } func mk(spdx, short, name, url, cat string) License { return License{SPDX: spdx, Short: short, Name: name, URL: url, Category: cat} }