chrismattmann closed pull request #153: This is the code for breaking dratstat URL: https://github.com/apache/drat/pull/153
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/distribution/src/main/resources/bin/dratstats.py
b/distribution/src/main/resources/bin/dratstats.py
index cae5c897..43ff1c86 100644
--- a/distribution/src/main/resources/bin/dratstats.py
+++ b/distribution/src/main/resources/bin/dratstats.py
@@ -268,7 +268,7 @@ def run(repos_list, output_dir):
print("\nOODT Started: OK\n")
print('Adding repository: '+str(rep)+' to Solr')
- index_solr(json.dumps([rep]))
+ # index_solr(json.dumps([rep]))
print("\nRunning DRAT on " + rep["repo"] + " ...\n")
@@ -295,178 +295,9 @@ def run(repos_list, output_dir):
wait_for_job("urn:drat:MimePartitioner")
wait_for_job("urn:drat:RatCodeAudit")
stats['map_end'] = current_datetime()
-
- if retval:
- time.sleep(5)
- stats['reduce_start'] =
current_datetime()
-
- # Extract data from
RatAggregate File
- totalNotes = 0
- totalBinaries = 0
- totalArchives = 0
- totalStandards = 0
- totalApache = 0
- totalGenerated = 0
- totalUnknown = 0
-
- rat_dir =
os.getenv("DRAT_HOME") + "/data/archive/rat"
-
- # Iterate over all RAT log
files
- for root, dirs, files in
os.walk(rat_dir):
- for filename in files:
- if
filename.endswith(".log"):
- (notes,
binaries, archives,standards,apachelicensed,generated,unknown) =
parseFile(os.path.join(root, filename))
-
totalNotes = totalNotes + notes
-
totalBinaries = totalBinaries + binaries
-
totalArchives = totalArchives + archives
-
totalStandards = totalStandards + standards
-
totalApache = totalApache + apachelicensed
-
totalGenerated = totalGenerated + generated
-
totalUnknown = totalUnknown + unknown
-
- stats["license_Notes"] =
totalNotes
- stats["license_Binaries"] =
totalBinaries
- stats["license_Archives"] =
totalArchives
- stats["license_Standards"] =
totalStandards
- stats["license_Apache"] =
totalApache
- stats["license_Generated"] =
totalGenerated
- stats["license_Unknown"] =
totalUnknown
-
- stats['reduce_end'] =
current_datetime()
- print "\nDRAT Scan Completed:
OK\n"
-
- time.sleep(5)
-
- if retval:
- # Copy Data with datetime variables above,
extract output from RatAggregate file, extract data from Solr Core
- printnow ("\nCopying data to Solr and Output
Directory...\n")
-
- # Extract data from Solr
- neg_mimetype = ["image", "application", "text",
"video", "audio", "message", "multipart"]
- connection =
urllib2.urlopen(os.getenv("SOLR_URL") +
"/drat/select?q=*%3A*&rows=0&facet=true&facet.field=mimetype&wt=python&indent=true")
-
- response = eval(connection.read())
- mime_count =
response["facet_counts"]["facet_fields"]["mimetype"]
-
- for i in range(0, len(mime_count), 2):
- if mime_count[i].split("/")[0] not in
neg_mimetype:
- stats["mime_" + mime_count[i]]
= mime_count[i + 1]
-
-
- # Count the number of files
- stats["files"] = count_num_files(rep["repo"],
".git")
-
- # Write data into Solr
- stats["type"] = 'software'
- stats_data = []
- stats_data.append(stats)
- json_data = json.dumps(stats_data)
- index_solr(json_data)
-
- # Parse RAT logs
- rat_logs_dir = os.getenv("DRAT_HOME") +
"/data/archive/rat/*/*.log"
- rat_license = {}
- rat_header = {}
- for filename in glob.glob(rat_logs_dir):
- #print('=' * 20)
- l = 0
- h = 0
- cur_file = ''
- cur_header = ''
- cur_section = ''
- parsedHeaders = False
- parsedLicenses = False
-
- with open(filename, 'rb') as f:
- printnow('Parsing rat log:
['+filename+']')
- for line in f:
- if
'*****************************************************' in line:
- l = 0
- h = 0
- if cur_section
== 'licenses':
-
parsedLicenses = True
- if cur_section
== 'headers':
-
parsedHeaders = True
-
- cur_file = ''
- cur_header = ''
- cur_section = ''
- if line.startswith('
Files with Apache') and not parsedLicenses:
- cur_section =
'licenses'
- if line.startswith('
Printing headers for ') and not parsedHeaders:
- cur_section =
'headers'
- if cur_section ==
'licenses':
- l += 1
- if l > 4:
- line =
line.strip()
- if line:
-
print("File: %s with License Line: %s" % (filename, line))
-
li = parse_license(line)
-
rat_license[li[0]] = li[1]
-
print(li)
- if cur_section ==
'headers':
- if
'=====================================================' in line or '== File:'
in line:
- h += 1
- if h == 2:
-
cur_file = line.split("/")[-1].strip()
- if h == 3:
-
cur_header += line
- if h == 4:
-
rat_header[cur_file] = cur_header.split("\n", 1)[1]
-
cur_file = ''
-
cur_header = ''
- h = 1
- if h == 3:
- rat_header[cur_file] =
cur_header.split("\n", 1)[1]
- parsedHeaders = True
- parsedLicenses = True
-
- # Index RAT logs into Solr
- connection =
urllib2.urlopen(os.getenv("SOLR_URL") +
-
"/drat/select?q=*%3A*&fl=filename%2Cfilelocation%2Cmimetype&wt=python&rows="
-
+ str(stats["files"]) +"&indent=true")
- response = eval(connection.read())
- docs = response['response']['docs']
- file_data = []
- batch = 100
- dc = 0
-
- for doc in docs:
- fdata = {}
- fdata['id'] =
os.path.join(doc['filelocation'][0], doc['filename'][0])
- m = md5.new()
- m.update(fdata['id'])
- hashId = m.hexdigest()
- fileId = hashId+"-"+doc['filename'][0]
-
- if fileId not in rat_license:
- print "File:
"+str(fdata['id'])+": ID: ["+fileId+"] not present in parsed licenses => Likely
file copying issue. Skipping."
- continue #handle issue with
DRAT #93
-
- fdata["type"] = 'file'
- fdata['parent'] = rep["repo"]
- fdata['mimetype'] = doc['mimetype'][0]
- fdata['license'] = rat_license[fileId]
- if fileId in rat_header:
- fdata['header'] =
rat_header[fileId]
- file_data.append(fdata)
- dc += 1
- if dc % batch == 0:
- json_data =
json.dumps(file_data)
- index_solr(json_data)
- file_data = []
- if dc % batch != 0:
- json_data = json.dumps(file_data)
- index_solr(json_data)
-
- # Copying data to Output Directory
- repos_out = output_dir + "/" +
normalize_path(rep["repo"])
- shutil.copytree(os.getenv("DRAT_HOME") +
"/data", repos_out)
- print("\nData copied to Solr and Output
Directory: OK\n")
-
- else:
- print ("\nDRAT Scan Completed: Resulted in
Error\n")
-
+ print ("\nwaiting for Rat
Aggregator...\n")
+ wait_for_job("urn:drat:RatAggregator")
+
time.sleep(5)
print ("\nStopping OODT...\n")
diff --git a/nohup.out b/nohup.out
new file mode 100644
index 00000000..bb32b5eb
--- /dev/null
+++ b/nohup.out
@@ -0,0 +1,3 @@
+Started dynamic workflow with id '6453cca6-9f30-11e8-b99d-f5018c8e9233'
+
+Navigate to http://localhost:8080/opsui/ to view the OODT browser and
http://localhost:8080/solr to view the Solr catalog.
diff --git
a/webapps/proteus-new/src/main/webapp/resources/src/components/statisticscomp.vue
b/webapps/proteus-new/src/main/webapp/resources/src/components/statisticscomp.vue
index 1280e9ce..adebf8b2 100644
---
a/webapps/proteus-new/src/main/webapp/resources/src/components/statisticscomp.vue
+++
b/webapps/proteus-new/src/main/webapp/resources/src/components/statisticscomp.vue
@@ -156,7 +156,7 @@ the License.
return this.stat.crawledfiles/this.stat.numOfFiles *100;
},
indexingprogress(){
- return this.stat.indexedfiles/this.stat.numberOfFiles * 100;
+ return this.stat.indexedfiles/this.stat.numOfFiles * 100;
}
}
}
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
