Revision: 28123
http://sourceforge.net/p/bibdesk/svn/28123
Author: hofman
Date: 2022-12-15 23:36:32 +0000 (Thu, 15 Dec 2022)
Log Message:
-----------
Rewrite SpringerLink web parser for current page format.
Modified Paths:
--------------
trunk/bibdesk/BDSKSpringerParser.m
Modified: trunk/bibdesk/BDSKSpringerParser.m
===================================================================
--- trunk/bibdesk/BDSKSpringerParser.m 2022-12-15 18:32:18 UTC (rev 28122)
+++ trunk/bibdesk/BDSKSpringerParser.m 2022-12-15 23:36:32 UTC (rev 28123)
@@ -40,6 +40,7 @@
#import "BibItem.h"
#import "NSError_BDSKExtensions.h"
#import "NSArray_BDSKExtensions.h"
+#import "NSURL_BDSKExtensions.h"
#import "DOMNode_BDSKExtensions.h"
#import "BDSKBibTeXParser.h"
#import "BDSKTypeManager.h"
@@ -55,123 +56,63 @@
@implementation BDSKSpringerParser
-+ (BibItem *)newItemFromDocument:(DOMDocument *)domDocument fromURL:(NSURL
*)url error:(NSError **)outError{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url {
+ if ([url hasDomain:@"link.springer.com"] == NO)
+ return NO;
- DOMNode *node = [domDocument documentElement];
- NSMutableDictionary *pubFields = [NSMutableDictionary dictionary];
- NSMutableArray *urlsArray = [NSMutableArray array];
-;
+ DOMNode *node = nil;
+ if ([url hasFirstPathComponent:@"article"])
+ node = [[domDocument documentElement]
singleNodeForXPath:@"./head/meta[@name='citation_doi']"];
+ else if ([url hasFirstPathComponent:@"search"])
+ node = [[domDocument documentElement]
singleNodeForXPath:@"./body//a[@class='title' and
starts-with(@href,'/article/')]"];
- NSString *pubType = BDSKMiscString;
- // set publication type
- NSString *pubTypeGuess = [[node
singleNodeForXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']/div[@class='primary']/a/@title"] stringValue];
- if (pubTypeGuess != nil) {
- if ([pubTypeGuess isEqualToString:@"Link to the Book of this
Chapter"]) {
- pubType = BDSKChapterString;
- } else if ([pubTypeGuess isEqualToString:@"Link to the Journal of this
Article"]) {
- pubType = BDSKArticleString;
- } else {
- return nil;
- }
- }
+ return node != nil;
+}
+
+- (NSArray *)itemsReturningError:(NSError **)outError {
+ BOOL isArticle = [[self URL] hasFirstPathComponent:@"article"];
+ NSString *bibtexNodePath = nil;
+ if (isArticle)
+ bibtexNodePath = @"./head/meta[@name='citation_doi']";
+ else
+ bibtexNodePath = @"./body//a[@class='title' and
starts-with(@href,'/article/')]";
+ NSArray *bibtexNodes = [[[self domDocument] documentElement]
nodesForXPath:bibtexNodePath];
- // set title
- NSString *title = [[node
singleNodeForXPath:@".//div[@id='ContentHeading']/div[@class='heading
primitive']/div[@class='text']/h1"] stringValue];
- if (title != nil)
- [pubFields setObject:title forKey:BDSKTitleString];
-
- // set book or journal
- if ([pubType isEqualToString:BDSKChapterString]) {
- NSString *chapter = [[node
singleNodeForXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']/div[@class='primary']/a"] stringValue];
- if (chapter != nil)
- [pubFields setObject:chapter forKey:BDSKBooktitleString];
- } else if ([pubType isEqualToString:BDSKArticleString]) {
- NSString *journal = [[node
singleNodeForXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']/div[@class='primary']/a"] stringValue];
- if (journal != nil)
- [pubFields setObject:journal forKey:BDSKJournalString];
+ for (DOMNode *bibtexNode in bibtexNodes) {
+ NSString *doi = nil;
+ if (isArticle)
+ doi = [bibtexNode stringValueOfAttribute:@"content"];
+ else
+ doi = [[bibtexNode stringValueOfAttribute:@"href"]
substringFromIndex:9];
+
+ NSString *bibtexURLString = [NSString
stringWithFormat:@"https://citation-needed.springer.com/v2/references/%@?format=bibtex&flavour=citation",
doi];
+ NSURL *bibtexURL = [NSURL URLWithString:bibtexURLString];
+
+ NSURLRequest *request = [NSURLRequest requestWithURL:bibtexURL];
+ NSDictionary *contextInfo = [NSDictionary
dictionaryWithObjectsAndKeys:doi, @"identifier", nil];
+
+ [self addDownloadWithRequest:request contextInfo:contextInfo];
}
- // set DOI and store for later use
- NSString *doi = [[node
singleNodeForXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']//span[@class='doi']/span[@class='value']"] stringValue];
- if (doi != nil)
- [pubFields setObject:doi forKey:BDSKDoiString];
+ return nil;
+}
- // set pages
- NSString *pages = [[node
singleNodeForXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']//span[@class='pagination']"] stringValue];
- if (pages != nil) {
- AGRegex *pagesRegex = [AGRegex
regexWithPattern:@"^([0-9]*)-([0-9]*)?"];
- AGRegexMatch *match = [pagesRegex findInString:pages];
- if ([match count] == 3) {
- NSMutableString *page = [[match groupAtIndex:1] mutableCopy];
- NSString *endPage = [match groupAtIndex:2];
- [page appendString:@"--"];
- if([page length] - 2 > [endPage length])
- [page appendString:[page substringToIndex:[page length] -
[endPage length] - 2]];
- [page appendString:endPage];
- [pubFields setObject:page forKey:BDSKPagesString];
- [page release];
- } else {
- [pubFields setObject:pages forKey:BDSKPagesString];
- }
- }
- // set authors
- NSString *authors = [[[node
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading
primitive']/div[@class='text']/p[@class='authors']/a"]
valueForKey:@"stringValue"] componentsJoinedByAnd];
- if (authors != nil)
- [pubFields setValue:authors forKey:BDSKAuthorString];
- // set editors
- NSString *editors = [[[node
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading
primitive']/div[@class='text']/p[@class='editors']/a"]
valueForKey:@"stringValue"] componentsJoinedByAnd];
- if (editors != nil)
- [pubFields setValue:editors forKey:BDSKEditorString];
- // set series
- if ([pubType isEqualToString:BDSKChapterString]) {
- NSString *series = [[node
singleNodeForXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']/div[@class='secondary']/a"] stringValue];
- if (series != nil)
- [pubFields setObject:series forKey:BDSKSeriesString];
- }
+- (NSArray *)itemsFromDownload:(BDSKCitationDownload *)download error:(NSError
**)outError {
+ NSArray *items = [super itemsFromDownload:download error:outError];
- // volume, number, and year
- NSString *vyString = [[node
singleNodeForXPath:@".//div[@id='ContentHeading']/div[@class='heading
enumeration']/div[@class='secondary']"] stringValue];
- if (vyString != nil) {
- // parse volume number
- AGRegex *volRegex = [AGRegex regexWithPattern:@"Volume
([0-9]*)[^0-9]"];
- AGRegexMatch *volMatch = [volRegex findInString:vyString];
- // set volume
- if (nil != [volMatch groupAtIndex:1]) {
- [pubFields setValue:[volMatch groupAtIndex:1]
forKey:BDSKVolumeString];
- }
- // parse issue number
- AGRegex *numRegex = [AGRegex regexWithPattern:@"Number
([0-9]*)[^0-9]"];
- AGRegexMatch *numMatch = [numRegex findInString:vyString];
- // set number
- if (nil != [numMatch groupAtIndex:1]) {
- [pubFields setValue:[numMatch groupAtIndex:1]
forKey:BDSKNumberString];
- }
- // parse year
- AGRegex *yearRegex = [AGRegex
regexWithPattern:@"[^0-9]([12][0-9][0-9][0-9])[^0-9]"];
- AGRegexMatch *yearMatch = [yearRegex findInString:vyString];
- // set year
- if (nil != [yearMatch groupAtIndex:1]) {
- // only if it appears before the string DOI to avoid confusing
parts of the DOI as the year
- if ([vyString rangeOfString:[yearMatch groupAtIndex:1]].location <
[vyString rangeOfString:@"DOI"].location) {
- [pubFields setValue:[yearMatch groupAtIndex:1]
forKey:BDSKYearString];
- }
- }
+ BibItem *item = [items firstObject];
+ NSString *doi = [[download contextInfo] objectForKey:@"identifier"];
+
+ if (item && doi) {
+ [item setField:BDSKUrlString toValue:nil];
+ [item addURLString:[NSString
stringWithFormat:@"https://link.springer.com/content/pdf/%@.pdf", doi]];
+ if ([BDSKDoiString isRemoteURLField])
+ [item addURLString:[@"https://doi.org/"
stringByAppendingString:[doi stringByAddingPercentEscapesForPath]]];
}
-
- // URL to PDF
- [urlsArray addObject:[[NSURL URLWithString:@"fulltext.pdf"
relativeToURL:url] absoluteString]];
- if ([BDSKDoiString isRemoteURLField]) {
- if ([doi containsString:@"://"] == NO)
- doi = [@"https://doi.org/" stringByAppendingString:[doi
stringByAddingPercentEscapesForPath]];
- [urlsArray addObject:doi];
- }
- return [[BibItem alloc] initWithType:pubType citeKey:nil
pubFields:pubFields URLStrings:urlsArray];
-
+ return items;
}
-+ (NSString *)citationNodeXPath { return @"./body//a[@data-gtmlabel='BIB']"; }
-
+ (NSString *)name {return @"SpringerLink"; }
+ (NSString *)address { return @"https://link.springer.com/"; }
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
_______________________________________________
Bibdesk-commit mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/bibdesk-commit