Hi all, I don't know if this can help somebody, I've changed the method process of the class LanguageIdentifierUpdateProcessor in order to support of multivalued fields and it works pretty well
protected SolrInputDocument process(SolrInputDocument doc) { String docLang = null; HashSet<String> docLangs = new HashSet<String>(); String fallbackLang = getFallbackLang(doc, fallbackFields, fallbackValue); if(langField == null || !doc.containsKey(langField) || (doc.containsKey(langField) && overwrite)) { String allText = concatFields(doc, inputFields); List<DetectedLanguage> languagelist = detectLanguage(allText); docLang = resolveLanguage(languagelist, fallbackLang); docLangs.add(docLang); log.debug("Detected main document language from fields " + inputFields.toString() + ": "+docLang); if(doc.containsKey(langField) && overwrite) { log.debug("Overwritten old value "+doc.getFieldValue(langField)); } if(langField != null && langField.length() != 0) { doc.setField(langField, docLang); } } else { // langField is set, we sanity check it against whitelist and fallback docLang = resolveLanguage((String) doc.getFieldValue(langField), fallbackLang); docLangs.add(docLang); log.debug("Field "+langField+" already contained value "+docLang+", not overwriting."); } if(enableMapping) { for (String fieldName : allMapFieldsSet) { if(doc.containsKey(fieldName)) { String fieldLang=""; if(mapIndividual && mapIndividualFieldsSet.contains(fieldName)) { Collection c = doc.getFieldValues(fieldName); for (Object o : c){ if(o instanceof String ){ List<DetectedLanguage> languagelist = detectLanguage((String) o); fieldLang = resolveLanguage(languagelist, docLang); docLangs.add(fieldLang); log.debug("Mapping multivalued field "+fieldName+" using individually detected language "+fieldLang); String mappedOutputField = getMappedField(fieldName, fieldLang); if (mappedOutputField != null) { log.debug("Mapping multivalued field {} to {}", doc.getFieldValue(docIdField), fieldLang); SolrInputField inField = new SolrInputField (fieldName); Collection currentContent =doc.getFieldValues(mappedOutputField); if (currentContent != null && currentContent.size()>0){ doc.addField(mappedOutputField, o); } else{ inField.setValue(o, doc.getField(fieldName).getBoost()); doc.setField(mappedOutputField, inField.getValue(), inField.getBoost()); } if(!mapKeepOrig) { log.debug("Removing old field {}", fieldName); doc.removeField(fieldName); } } else { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid output field mapping for " + fieldName + " field and language: " + fieldLang); } } } } else { fieldLang = docLang; log.debug("Mapping field "+fieldName+" using document global language "+fieldLang); String mappedOutputField = getMappedField(fieldName, fieldLang); if (mappedOutputField != null) { log.debug("Mapping field {} to {}", doc.getFieldValue(docIdField), fieldLang); SolrInputField inField = doc.getField(fieldName); doc.setField(mappedOutputField, inField.getValue(), inField.getBoost()); if(!mapKeepOrig) { log.debug("Removing old field {}", fieldName); doc.removeField(fieldName); } } else { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid output field mapping for " + fieldName + " field and language: " + fieldLang); } } } } } // Set the languages field to an array of all detected languages if(langsField != null && langsField.length() != 0) { doc.setField(langsField, docLangs.toArray()); } return doc; } -- View this message in context: http://lucene.472066.n3.nabble.com/Language-detection-for-multivalued-field-tp4096996p4157573.html Sent from the Solr - User mailing list archive at Nabble.com.