This is an automated email from the ASF dual-hosted git repository.
sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git
The following commit(s) were added to refs/heads/master by this push:
new 82867766 Add CSP checks
82867766 is described below
commit 828677669d1ce142bac5686c2979c7f08346c081
Author: Sebb <[email protected]>
AuthorDate: Sat Oct 25 17:20:53 2025 +0100
Add CSP checks
---
lib/whimsy/sitestandards.rb | 92 +++++++++++++++++++++++++++++++++++++--------
lib/whimsy/sitewebsite.rb | 22 +++++++++--
2 files changed, 94 insertions(+), 20 deletions(-)
diff --git a/lib/whimsy/sitestandards.rb b/lib/whimsy/sitestandards.rb
index a973a89b..360870cf 100644
--- a/lib/whimsy/sitestandards.rb
+++ b/lib/whimsy/sitestandards.rb
@@ -6,7 +6,7 @@ module SiteStandards
CHECK_TEXT = 'text' # (optional) Regex of <a ...>Text to scan
for</a>, of a.text.downcase.strip
CHECK_CAPTURE = 'capture' # a_href minimal regex to capture - for
license, we capture the link if it points to apache.org somewhere
CHECK_VALIDATE = 'validate' # a_href detailed regex to expect for
compliance; it must point to one of our actual licenses to pass
- CHECK_TYPE = 'type' # true = validation checks href/url; false =
checks text node
+ CHECK_TYPE = 'type' # 'href', 'text' or 'message'
CHECK_POLICY = 'policy' # URL to policy statement for this check
CHECK_DOC = 'doc' # Explanation of what the check is looking for
@@ -16,7 +16,7 @@ module SiteStandards
CHECK_TEXT => nil,
CHECK_CAPTURE => nil,
CHECK_VALIDATE => %r{https?://[^.]+\.apache\.org},
- CHECK_TYPE => true,
+ CHECK_TYPE => 'href',
CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#websites',
CHECK_DOC => 'The homepage for any ProjectName must be served from
http://ProjectName.apache.org',
},
@@ -27,7 +27,7 @@ module SiteStandards
CHECK_TEXT => nil,
CHECK_CAPTURE => %r{https?://[^.]+(\.incubator)?\.apache\.org},
CHECK_VALIDATE => %r{https?://[^.]+(\.incubator)?\.apache\.org},
- CHECK_TYPE => true,
+ CHECK_TYPE => 'href',
CHECK_POLICY =>
'https://incubator.apache.org/guides/sites.html#podling_website_requirements',
CHECK_DOC => 'The homepage for any ProjectName must be served from
http://ProjectName(.incubator)?.apache.org',
},
@@ -35,7 +35,7 @@ module SiteStandards
CHECK_TEXT => %r{Incubation is required of all newly accepted projects},
CHECK_CAPTURE => %r{Incubation is required of all newly accepted
projects},
CHECK_VALIDATE => %r{Apache \S+( \S+)?( \([Ii]ncubating\))? is an
effort undergoing [Ii]ncubation at [Tt]he Apache Software Foundation \(ASF\),?
sponsored by the (Apache )?\S+( PMC)?. Incubation is required of all newly
accepted projects until a further review indicates that the infrastructure,
communications, and decision making process have stabilized in a manner
consistent with other successful ASF projects. While incubation status is not
necessarily a reflection of the complet [...]
- CHECK_TYPE => false,
+ CHECK_TYPE => 'text',
CHECK_POLICY =>
'https://incubator.apache.org/guides/branding.html#disclaimers',
CHECK_DOC => 'All Apache Incubator Podling sites must contain the
incubating disclaimer.',
},
@@ -46,7 +46,7 @@ module SiteStandards
CHECK_TEXT => %r{apache|asf|foundation}i,
CHECK_CAPTURE => %r{^(https?:)?//(www\.)?apache\.org/?$},
CHECK_VALIDATE => %r{apache|asf|foundation}i,
- CHECK_TYPE => false,
+ CHECK_TYPE => 'text',
CHECK_POLICY =>
'https://www.apache.org/foundation/marks/pmcs#navigation',
CHECK_DOC => 'All projects must feature some prominent link back to the
main ASF homepage at http://www.apache.org/',
},
@@ -57,7 +57,7 @@ module SiteStandards
CHECK_TEXT => nil,
CHECK_CAPTURE => %r{(events|x)/current-event|event-images},
CHECK_VALIDATE =>
%r{^https?://((www\.)?apache\.org/events/current-event|events\.apache\.org|www\.apachecon\.com/event-images/snippet\.js)},
- CHECK_TYPE => true,
+ CHECK_TYPE => 'href',
CHECK_POLICY => 'https://www.apachecon.com/event-images/',
CHECK_DOC => 'Projects SHOULD include a link to any current
CommunityOverCode event, or to the events.apache.org site, as provided by VP,
Conferences.',
},
@@ -65,7 +65,7 @@ module SiteStandards
CHECK_TEXT => /^license$/,
CHECK_CAPTURE => %r{apache\.org},
CHECK_VALIDATE => %r{^https?://.*apache.org/licenses/?$},
- CHECK_TYPE => true,
+ CHECK_TYPE => 'href',
CHECK_POLICY =>
'https://www.apache.org/foundation/marks/pmcs#navigation',
CHECK_DOC => 'There should be a "License" (*not* "Licenses") navigation
link which points to: http[s]://www.apache.org/licenses[/]. (Do not link to
sub-pages)',
},
@@ -73,7 +73,7 @@ module SiteStandards
CHECK_TEXT => /\A(sponsors|thanks!?|thanks to our sponsors)\z/,
CHECK_CAPTURE => /\A(sponsors|thanks!?|thanks to our sponsors)\z/,
CHECK_VALIDATE =>
%r{^https?://.*apache.org/foundation/(thanks|sponsors)},
- CHECK_TYPE => true,
+ CHECK_TYPE => 'href',
CHECK_POLICY =>
'https://www.apache.org/foundation/marks/pmcs#navigation',
CHECK_DOC => '"Sponsors", "Thanks" or "Thanks to our Sponsors" should
link to: http://www.apache.org/foundation/thanks.html or sponsors.html',
},
@@ -81,7 +81,7 @@ module SiteStandards
CHECK_TEXT => /security/,
CHECK_CAPTURE => /security/,
CHECK_VALIDATE => %r{^(https?://.*apache.org|[^:]*)/.*[Ss]ecurity},
- CHECK_TYPE => true,
+ CHECK_TYPE => 'href',
CHECK_POLICY =>
'https://www.apache.org/foundation/marks/pmcs#navigation',
CHECK_DOC => '"Security" should link to either to a project-specific
page [...], or to the main http://www.apache.org/security/ page.',
},
@@ -89,7 +89,7 @@ module SiteStandards
CHECK_TEXT =>
%r{sponsorship|\bdonate\b|sponsor\sapache|sponsoring\sapache|sponsor},
CHECK_CAPTURE =>
%r{sponsorship|\bdonate\b|sponsor\sapache|sponsoring\sapache|sponsor},
CHECK_VALIDATE => %r{^https?://.*apache.org/foundation/sponsorship},
- CHECK_TYPE => true,
+ CHECK_TYPE => 'href',
CHECK_POLICY =>
'https://www.apache.org/foundation/marks/pmcs#navigation',
CHECK_DOC => '"Sponsorship", "Sponsor Apache", or "Donate" should link
to: http://www.apache.org/foundation/sponsorship.html',
},
@@ -98,7 +98,7 @@ module SiteStandards
CHECK_TEXT => %r{\btrademarks\b},
CHECK_CAPTURE => %r{\btrademarks\b},
CHECK_VALIDATE => %r{trademarks of [Tt]he Apache Software Foundation},
- CHECK_TYPE => false,
+ CHECK_TYPE => 'text',
CHECK_POLICY =>
'https://www.apache.org/foundation/marks/pmcs#attributions',
CHECK_DOC => 'All project or product homepages must feature a prominent
trademark attribution of all applicable Apache trademarks.',
},
@@ -106,7 +106,7 @@ module SiteStandards
CHECK_TEXT => %r{((Copyright|©).*apache|apache.*(Copyright|©))}i,
CHECK_CAPTURE => %r{(Copyright|©)}i,
CHECK_VALIDATE => %r{((Copyright|©).*apache|apache.*(Copyright|©))}i,
- CHECK_TYPE => false,
+ CHECK_TYPE => 'text',
CHECK_POLICY => 'https://www.apache.org/legal/src-headers.html#headers',
CHECK_DOC => 'All website content SHOULD include a copyright notice for
the ASF.',
},
@@ -118,7 +118,7 @@ module SiteStandards
|
\Ahttps?://(?:www\.)?apache\.org/foundation/policies/privacy\.html\z
}ix,
- CHECK_TYPE => true,
+ CHECK_TYPE => 'href',
CHECK_POLICY =>
'https://www.apache.org/foundation/marks/pmcs.html#navigation',
CHECK_DOC => 'All websites must link to the Privacy Policy.',
},
@@ -127,7 +127,7 @@ module SiteStandards
CHECK_TEXT => %r{Found \d+ external resources},
CHECK_CAPTURE => %r{Found \d+ external resources},
CHECK_VALIDATE => %r{Found 0 external resources},
- CHECK_TYPE => false,
+ CHECK_TYPE => 'text',
CHECK_POLICY => 'https://privacy.apache.org/faq/committers.html',
CHECK_DOC => 'Websites must not link to externally hosted resources',
},
@@ -136,10 +136,20 @@ module SiteStandards
CHECK_TEXT => nil,
CHECK_CAPTURE => nil,
CHECK_VALIDATE => %r{.},
- CHECK_TYPE => true,
+ CHECK_TYPE => 'href',
CHECK_POLICY => 'https://www.apache.org/logos/',
CHECK_DOC => 'Projects SHOULD add a copy of their logo to
https://www.apache.org/logos/ to be included in ASF homepage.',
},
+
+ 'csp' => { # Custom: CSP must follow standards
+ CHECK_TEXT => 'Non-standard CSP',
+ CHECK_CAPTURE => nil,
+ CHECK_VALIDATE => :CSP,
+ CHECK_TYPE => 'message',
+ CHECK_POLICY => 'https://infra.apache.org/tools/csp.html',
+ CHECK_DOC => 'Websites must not replace the default
Content-Security-Policy',
+ },
+
}
SITE_PASS = 'label-success'
@@ -208,6 +218,55 @@ module SiteStandards
end
return sites, crawl_time
end
+
+ CSP_INFRA_BASE = <<-EOD.strip.gsub(%r{([.*])}, "\\\\\\1")
+ 'self' data: blob: 'unsafe-inline' 'unsafe-eval'
+ https://www.apachecon.com/
+ https://www.communityovercode.org/
+ https://*.apache.org/ https://apache.org/
+ EOD
+ CSP_THIRD_PARTY = "https://*.scarf.sh/ ".gsub(%r{([.*])}, "\\\\\\1")
+
+ CSP_PROJECT_DOMAINS = "(.*)" # Allow anything here (capture it)
+
+ DEFAULT_CSP = <<-EOD.strip.gsub("\n",'').gsub(/ +/, ' ')
+ default-src #{CSP_INFRA_BASE} #{CSP_THIRD_PARTY} #{CSP_PROJECT_DOMAINS};
+ script-src #{CSP_INFRA_BASE} #{CSP_THIRD_PARTY} #{CSP_PROJECT_DOMAINS};
+ style-src #{CSP_INFRA_BASE} #{CSP_THIRD_PARTY} #{CSP_PROJECT_DOMAINS};
+ frame-ancestors 'self';
+ frame-src #{CSP_INFRA_BASE} #{CSP_THIRD_PARTY} #{CSP_PROJECT_DOMAINS};
+ worker-src 'self' data: blob:;
+ EOD
+
+ NON_PMC = <<-EOD.strip.gsub("\n",'').gsub(/ +/, ' ')
+ default-src 'self' data: 'unsafe-inline'
+ https://www.apachecon.com/ https://analytics.apache.org/
http://analytics.apache.org/
+ https://www.youtube-nocookie.com https://www.youtube.com;
+ script-src 'self' 'unsafe-inline' 'unsafe-eval'
+ https://www.apachecon.com/ https://analytics.apache.org/
http://analytics.apache.org/
+ https://www.youtube-nocookie.com https://www.youtube.com;
+ style-src 'self' 'unsafe-inline';
+ frame-ancestors 'none';
+ img-src 'self' data: https://www.apache.org/ https://www.apachecon.com/;
+ EOD
+
+ DEFAULT_CSP_RE = %r{^#{DEFAULT_CSP}$}
+
+ def CSP(site, key)
+ squashed = site&.gsub(/ +/, ' ') # data might be missing
+ return true if squashed == NON_PMC
+ if squashed =~ DEFAULT_CSP_RE
+ custom = $1
+ $stderr.puts [key, custom].inspect if custom.size > 1
+ return true
+ end
+ return false
+ end
+
+ def _validate(site, match, key)
+ return method(match).call(site, key) if match.is_a? Symbol
+ return site =~ match
+ end
# Analyze data returned from site-scan.rb by using checks[CHECK_VALIDATE]
regex
# If value =~ CHECK_VALIDATE, SITE_PASS
@@ -216,11 +275,12 @@ module SiteStandards
# @param sites hash of site-scan data collected
# @param checks to apply to sites to determine status
# @return [overall counts, description of statuses, success listings]
+ # called by site_or_pod.rb
def analyze(sites, checks)
success = Hash.new { |h, k| h[k] = Hash.new(&h.default_proc) }
counts = Hash.new { |h, k| h[k] = Hash.new(&h.default_proc) }
checks.each do |nam, check_data|
- success[nam] = sites.select { |_, site| site[nam] =~
check_data[SiteStandards::CHECK_VALIDATE] }.keys
+ success[nam] = sites.select { |key, site| _validate(site[nam],
check_data[SiteStandards::CHECK_VALIDATE], key) }.keys
counts[nam][SITE_PASS] = success[nam].count
counts[nam][SITE_WARN] = 0 # Reorder output
counts[nam][SITE_FAIL] = sites.select { |_, site| site[nam].nil? }.count
diff --git a/lib/whimsy/sitewebsite.rb b/lib/whimsy/sitewebsite.rb
index d5cea75b..4f6a3184 100644
--- a/lib/whimsy/sitewebsite.rb
+++ b/lib/whimsy/sitewebsite.rb
@@ -55,12 +55,19 @@ def display_project(project, links, analysis, checks, tlp =
true)
_td do
if cls != SiteStandards::SITE_PASS
if checks.keys.include? col
- if checks[col][SiteStandards::CHECK_TYPE]
+ chktype = checks[col][SiteStandards::CHECK_TYPE]
+ if chktype == 'href'
_ 'URL expected to match regular expression: '
_code checks[col][SiteStandards::CHECK_VALIDATE].source
- else
+ elsif chktype == 'text'
_ 'Text of a link expected to match regular expression: '
_code checks[col][SiteStandards::CHECK_TEXT].source
+ elsif chktype == 'message'
+ _ 'Msg:'
+ _code checks[col][SiteStandards::CHECK_TEXT]
+ else
+ _ 'Unknown type:'
+ _code chktype
end
_br
_a checks[col][SiteStandards::CHECK_DOC], href:
checks[col][SiteStandards::CHECK_POLICY]
@@ -91,12 +98,19 @@ def display_check(col, sites, analysis, checks, tlp = true)
end
_span.glyphicon.glyphicon_menu_right
if checks.keys.include? col
- if checks[col][SiteStandards::CHECK_TYPE]
+ chktype = checks[col][SiteStandards::CHECK_TYPE]
+ if chktype == 'href'
_ 'Check Results URL expected to match regular expression: '
_code checks[col][SiteStandards::CHECK_VALIDATE].source
- else
+ elsif chktype == 'text'
_ 'Check Results Text of a link expected to match regular
expression: '
_code checks[col][SiteStandards::CHECK_TEXT].source
+ elsif chktype == 'message'
+ _ 'Msg:'
+ _code checks[col][SiteStandards::CHECK_TEXT]
+ else
+ _ 'Unknown type:'
+ _code chktype
end
if checks.include? col
_br