Revision: 28123
          http://sourceforge.net/p/bibdesk/svn/28123
Author:   hofman
Date:     2022-12-15 23:36:32 +0000 (Thu, 15 Dec 2022)
Log Message:
-----------
Rewrite SpringerLink web parser for current page format.

Modified Paths:
--------------
    trunk/bibdesk/BDSKSpringerParser.m

Modified: trunk/bibdesk/BDSKSpringerParser.m
===================================================================
--- trunk/bibdesk/BDSKSpringerParser.m  2022-12-15 18:32:18 UTC (rev 28122)
+++ trunk/bibdesk/BDSKSpringerParser.m  2022-12-15 23:36:32 UTC (rev 28123)
@@ -40,6 +40,7 @@
 #import "BibItem.h"
 #import "NSError_BDSKExtensions.h"
 #import "NSArray_BDSKExtensions.h"
+#import "NSURL_BDSKExtensions.h"
 #import "DOMNode_BDSKExtensions.h"
 #import "BDSKBibTeXParser.h"
 #import "BDSKTypeManager.h"
@@ -55,123 +56,63 @@
 
 @implementation BDSKSpringerParser
 
-+ (BibItem *)newItemFromDocument:(DOMDocument *)domDocument fromURL:(NSURL 
*)url error:(NSError **)outError{
++ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url {
+    if ([url hasDomain:@"link.springer.com"] == NO)
+        return NO;
     
-       DOMNode *node = [domDocument documentElement];
-       NSMutableDictionary *pubFields = [NSMutableDictionary dictionary];
-       NSMutableArray *urlsArray = [NSMutableArray array];
-;
+    DOMNode *node = nil;
+    if ([url hasFirstPathComponent:@"article"])
+        node = [[domDocument documentElement] 
singleNodeForXPath:@"./head/meta[@name='citation_doi']"];
+    else if ([url hasFirstPathComponent:@"search"])
+        node = [[domDocument documentElement] 
singleNodeForXPath:@"./body//a[@class='title' and 
starts-with(@href,'/article/')]"];
     
-    NSString *pubType = BDSKMiscString;
-    // set publication type
-    NSString *pubTypeGuess = [[node 
singleNodeForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']/div[@class='primary']/a/@title"] stringValue];
-    if (pubTypeGuess != nil) {
-        if ([pubTypeGuess isEqualToString:@"Link to the Book of this 
Chapter"]) {
-            pubType = BDSKChapterString;
-        } else if ([pubTypeGuess isEqualToString:@"Link to the Journal of this 
Article"]) {
-            pubType = BDSKArticleString;
-        } else {
-            return nil;
-        }
-    }
+    return node != nil;
+}
+
+- (NSArray *)itemsReturningError:(NSError **)outError {
+    BOOL isArticle = [[self URL] hasFirstPathComponent:@"article"];
+    NSString *bibtexNodePath = nil;
+    if (isArticle)
+        bibtexNodePath = @"./head/meta[@name='citation_doi']";
+    else
+        bibtexNodePath = @"./body//a[@class='title' and 
starts-with(@href,'/article/')]";
+    NSArray *bibtexNodes = [[[self domDocument] documentElement] 
nodesForXPath:bibtexNodePath];
     
-       // set title
-    NSString *title = [[node 
singleNodeForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
primitive']/div[@class='text']/h1"] stringValue];
-    if (title != nil)
-        [pubFields setObject:title forKey:BDSKTitleString];
-       
-    // set book or journal
-    if ([pubType isEqualToString:BDSKChapterString]) {
-        NSString *chapter = [[node 
singleNodeForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']/div[@class='primary']/a"] stringValue];
-        if (chapter != nil)
-            [pubFields setObject:chapter forKey:BDSKBooktitleString];
-    } else if ([pubType isEqualToString:BDSKArticleString]) {
-        NSString *journal = [[node 
singleNodeForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']/div[@class='primary']/a"] stringValue];
-        if (journal != nil)
-            [pubFields setObject:journal forKey:BDSKJournalString];
+    for (DOMNode *bibtexNode in bibtexNodes) {
+        NSString *doi = nil;
+        if (isArticle)
+            doi = [bibtexNode stringValueOfAttribute:@"content"];
+        else
+            doi = [[bibtexNode stringValueOfAttribute:@"href"] 
substringFromIndex:9];
+        
+        NSString *bibtexURLString = [NSString 
stringWithFormat:@"https://citation-needed.springer.com/v2/references/%@?format=bibtex&flavour=citation";,
 doi];
+        NSURL *bibtexURL = [NSURL URLWithString:bibtexURLString];
+        
+        NSURLRequest *request = [NSURLRequest requestWithURL:bibtexURL];
+        NSDictionary *contextInfo = [NSDictionary 
dictionaryWithObjectsAndKeys:doi, @"identifier", nil];
+        
+        [self addDownloadWithRequest:request contextInfo:contextInfo];
     }
     
-       // set DOI and store for later use
-    NSString *doi = [[node 
singleNodeForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']//span[@class='doi']/span[@class='value']"] stringValue];
-    if (doi != nil)
-        [pubFields setObject:doi forKey:BDSKDoiString];
+    return nil;
+}
 
-       // set pages
-    NSString *pages = [[node 
singleNodeForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']//span[@class='pagination']"] stringValue];
-    if (pages != nil) {
-        AGRegex *pagesRegex = [AGRegex 
regexWithPattern:@"^([0-9]*)-([0-9]*)?"];
-        AGRegexMatch *match = [pagesRegex findInString:pages];
-        if ([match count] == 3) {
-            NSMutableString *page = [[match groupAtIndex:1] mutableCopy];
-            NSString *endPage = [match groupAtIndex:2];
-            [page appendString:@"--"];
-            if([page length] - 2 > [endPage length])
-                [page appendString:[page substringToIndex:[page length] - 
[endPage length] - 2]];
-            [page appendString:endPage];
-            [pubFields setObject:page forKey:BDSKPagesString];
-            [page release];
-        } else {
-            [pubFields setObject:pages forKey:BDSKPagesString];
-        }
-    }
-       // set authors
-    NSString *authors = [[[node 
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
primitive']/div[@class='text']/p[@class='authors']/a"] 
valueForKey:@"stringValue"] componentsJoinedByAnd];
-    if (authors != nil)
-        [pubFields setValue:authors forKey:BDSKAuthorString];
-       // set editors
-    NSString *editors = [[[node 
nodesForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
primitive']/div[@class='text']/p[@class='editors']/a"] 
valueForKey:@"stringValue"] componentsJoinedByAnd];
-    if (editors != nil)
-        [pubFields setValue:editors forKey:BDSKEditorString];
-       // set series
-    if ([pubType isEqualToString:BDSKChapterString]) {
-        NSString *series = [[node 
singleNodeForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']/div[@class='secondary']/a"] stringValue];
-        if (series != nil)
-            [pubFields setObject:series forKey:BDSKSeriesString];
-    }
+- (NSArray *)itemsFromDownload:(BDSKCitationDownload *)download error:(NSError 
**)outError {
+    NSArray *items = [super itemsFromDownload:download error:outError];
     
-    // volume, number, and year
-    NSString *vyString = [[node 
singleNodeForXPath:@".//div[@id='ContentHeading']/div[@class='heading 
enumeration']/div[@class='secondary']"] stringValue];
-    if (vyString != nil) {
-        // parse volume number
-               AGRegex *volRegex = [AGRegex regexWithPattern:@"Volume 
([0-9]*)[^0-9]"];
-               AGRegexMatch *volMatch = [volRegex findInString:vyString];
-               // set volume
-               if (nil != [volMatch groupAtIndex:1]) {
-                       [pubFields setValue:[volMatch groupAtIndex:1] 
forKey:BDSKVolumeString];
-               }
-        // parse issue number
-               AGRegex *numRegex = [AGRegex regexWithPattern:@"Number 
([0-9]*)[^0-9]"];
-               AGRegexMatch *numMatch = [numRegex findInString:vyString];
-               // set number
-               if (nil != [numMatch groupAtIndex:1]) {
-                       [pubFields setValue:[numMatch groupAtIndex:1] 
forKey:BDSKNumberString];
-               }
-        // parse year
-               AGRegex *yearRegex = [AGRegex 
regexWithPattern:@"[^0-9]([12][0-9][0-9][0-9])[^0-9]"];
-               AGRegexMatch *yearMatch = [yearRegex findInString:vyString];
-               // set year
-               if (nil != [yearMatch groupAtIndex:1]) {
-            // only if it appears before the string DOI to avoid confusing 
parts of the DOI as the year
-            if ([vyString rangeOfString:[yearMatch groupAtIndex:1]].location < 
[vyString rangeOfString:@"DOI"].location) {
-                [pubFields setValue:[yearMatch groupAtIndex:1] 
forKey:BDSKYearString];
-            }
-               }
+    BibItem *item = [items firstObject];
+    NSString *doi = [[download contextInfo] objectForKey:@"identifier"];
+    
+    if (item && doi) {
+        [item setField:BDSKUrlString toValue:nil];
+        [item addURLString:[NSString 
stringWithFormat:@"https://link.springer.com/content/pdf/%@.pdf";, doi]];
+        if ([BDSKDoiString isRemoteURLField])
+            [item addURLString:[@"https://doi.org/"; 
stringByAppendingString:[doi stringByAddingPercentEscapesForPath]]];
     }
-       
-    // URL to PDF
-    [urlsArray addObject:[[NSURL URLWithString:@"fulltext.pdf" 
relativeToURL:url] absoluteString]];
-    if ([BDSKDoiString isRemoteURLField]) {
-        if ([doi containsString:@"://"] == NO)
-            doi = [@"https://doi.org/"; stringByAppendingString:[doi 
stringByAddingPercentEscapesForPath]];
-        [urlsArray addObject:doi];
-    }
     
-       return [[BibItem alloc] initWithType:pubType citeKey:nil 
pubFields:pubFields URLStrings:urlsArray];
-    
+    return items;
 }
 
-+ (NSString *)citationNodeXPath { return @"./body//a[@data-gtmlabel='BIB']"; }
-
 + (NSString *)name {return @"SpringerLink"; }
 
 + (NSString *)address { return @"https://link.springer.com/";; }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.



_______________________________________________
Bibdesk-commit mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/bibdesk-commit

Reply via email to