Package org.apache.manifoldcf.agents.interfaces

Examples of org.apache.manifoldcf.agents.interfaces.RepositoryDocument


              long startFetchTime = System.currentTimeMillis();
              String fileName = getFileCanonicalPath(file);
              if (fileName != null && !file.isHidden())
              {
                // Initialize repository document with common stuff, and find the URI
                RepositoryDocument rd = new RepositoryDocument();
                String uri = prepareForIndexing(rd,file,version);

                if (activities.checkURLIndexable(uri))
                {

                  // manipulate path to include the DFS alias, not the literal path
                  // String newPath = matchPrefix + fileName.substring(matchReplace.length());
                  String newPath = fileName;
                  if (checkNeedFileData(newPath, spec))
                  {
                    if (Logging.connectors.isDebugEnabled())
                      Logging.connectors.debug("JCIFS: Local file data needed for '"+documentIdentifier+"'");

                    // Create a temporary file, and use that for the check and then the ingest
                    File tempFile = File.createTempFile("_sdc_",null);
                    try
                    {
                      FileOutputStream os = new FileOutputStream(tempFile);
                      try
                      {

                        // Now, make a local copy so we can fingerprint
                        InputStream inputStream = getFileInputStream(file);
                        try
                        {
                          // Copy!
                          if (transferBuffer == null)
                            transferBuffer = new byte[65536];
                          while (true)
                          {
                            int amt = inputStream.read(transferBuffer,0,transferBuffer.length);
                            if (amt == -1)
                              break;
                            os.write(transferBuffer,0,amt);
                          }
                        }
                        finally
                        {
                          inputStream.close();
                        }
                      }
                      finally
                      {
                        os.close();
                      }

                      if (checkIngest(tempFile, newPath, spec, activities))
                      {
                        if (Logging.connectors.isDebugEnabled())
                          Logging.connectors.debug("JCIFS: Decided to ingest '"+documentIdentifier+"'");
                        // OK, do ingestion itself!
                        InputStream inputStream = new FileInputStream(tempFile);
                        try
                        {
                          rd.setBinary(inputStream, tempFile.length());
                         
                          activities.ingestDocument(documentIdentifier, version, uri, rd);
                        }
                        finally
                        {
                          inputStream.close();
                        }

                        // I put this record here deliberately for two reasons:
                        // (1) the other path includes ingestion time, and
                        // (2) if anything fails up to and during ingestion, I want THAT failure record to be written, not this one.
                        // So, really, ACTIVITY_ACCESS is a bit more than just fetch for JCIFS...
                        activities.recordActivity(new Long(startFetchTime),ACTIVITY_ACCESS,
                          new Long(tempFile.length()),documentIdentifier,"Success",null,null);

                      }
                      else
                      {
                        // We must actively remove the document here, because the getDocumentVersions()
                        // method has no way of signalling this, since it does not do the fingerprinting.
                        if (Logging.connectors.isDebugEnabled())
                          Logging.connectors.debug("JCIFS: Decided to remove '"+documentIdentifier+"'");
                        activities.deleteDocument(documentIdentifier, version);
                        // We should record the access here as well, since this is a non-exception way through the code path.
                        // (I noticed that this was not being recorded in the history while fixing 25477.)
                        activities.recordActivity(new Long(startFetchTime),ACTIVITY_ACCESS,
                          new Long(tempFile.length()),documentIdentifier,"Success",null,null);
                      }
                    }
                    finally
                    {
                      tempFile.delete();
                    }
                  }
                  else
                  {
                    if (Logging.connectors.isDebugEnabled())
                      Logging.connectors.debug("JCIFS: Local file data not needed for '"+documentIdentifier+"'");

                    // Presume that since the file was queued that it fulfilled the needed criteria.
                    // Go off and ingest the fast way.

                    // Ingest the document.
                    InputStream inputStream = getFileInputStream(file);
                    try
                    {
                      rd.setBinary(inputStream, fileLength(file));
                     
                      activities.ingestDocument(documentIdentifier, version, uri, rd);
                    }
                    finally
                    {
View Full Code Here


        Document document = (Document) cmisObject;
        long fileLength = document.getContentStreamLength();
        InputStream is = null;
       
        try {
          RepositoryDocument rd = new RepositoryDocument();
          Date createdDate = document.getCreationDate().getTime();
          Date modifiedDate = document.getLastModificationDate().getTime();
         
          rd.setFileName(document.getContentStreamFileName());
          rd.setMimeType(document.getContentStreamMimeType());
          rd.setCreatedDate(createdDate);
          rd.setModifiedDate(modifiedDate);
         
          //binary
          if(fileLength>0 && document.getContentStream()!=null){
            is = document.getContentStream().getStream();
            rd.setBinary(is, fileLength);
          }

          //properties
          List<Property<?>> properties = document.getProperties();
          String id = StringUtils.EMPTY;
          for (Property<?> property : properties) {
            String propertyId = property.getId();
            if (propertyId.endsWith(Constants.PARAM_OBJECT_ID))
              id = (String) property.getValue();

            if (property.getValue() !=null
                || property.getValues() != null) {
              PropertyType propertyType = property.getType();

              switch (propertyType) {

              case STRING:
              case ID:
              case URI:
              case HTML:
                if(property.isMultiValued()){
                  List<String> htmlPropertyValues = (List<String>) property.getValues();
                  for (String htmlPropertyValue : htmlPropertyValues) {
                    rd.addField(propertyId, htmlPropertyValue);
                  }
                } else {
                  String stringValue = (String) property.getValue();
                  if(StringUtils.isNotEmpty(stringValue)){
                    rd.addField(propertyId, stringValue);
                  }
                }
                break;
    
              case BOOLEAN:
                if(property.isMultiValued()){
                  List<Boolean> booleanPropertyValues = (List<Boolean>) property.getValues();
                  for (Boolean booleanPropertyValue : booleanPropertyValues) {
                    rd.addField(propertyId, booleanPropertyValue.toString());
                  }
                } else {
                  Boolean booleanValue = (Boolean) property.getValue();
                  if(booleanValue!=null){
                    rd.addField(propertyId, booleanValue.toString());
                  }
                }
                break;

              case INTEGER:
                if(property.isMultiValued()){
                  List<BigInteger> integerPropertyValues = (List<BigInteger>) property.getValues();
                  for (BigInteger integerPropertyValue : integerPropertyValues) {
                    rd.addField(propertyId, integerPropertyValue.toString());
                  }
                } else {
                  BigInteger integerValue = (BigInteger) property.getValue();
                  if(integerValue!=null){
                    rd.addField(propertyId, integerValue.toString());
                  }
                }
                break;

              case DECIMAL:
                if(property.isMultiValued()){
                  List<BigDecimal> decimalPropertyValues = (List<BigDecimal>) property.getValues();
                  for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
                    rd.addField(propertyId, decimalPropertyValue.toString());
                  }
                } else {
                  BigDecimal decimalValue = (BigDecimal) property.getValue();
                  if(decimalValue!=null){
                    rd.addField(propertyId, decimalValue.toString());
                  }
                }
                break;

              case DATETIME:
                if(property.isMultiValued()){
                  List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>) property.getValues();
                  for (GregorianCalendar datePropertyValue : datePropertyValues) {
                    rd.addField(propertyId,
                        ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
                  }
                } else {
                  GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
                  if(dateValue!=null){
                    rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
                  }
                }
                break;

              default:
View Full Code Here

        }

        // It is a file to be indexed.
       
        // Prepare the metadata part of RepositoryDocument
        RepositoryDocument data = new RepositoryDocument();

        data.setFileName(fileStatus.getPath().getName());
        data.setMimeType(mapExtensionToMimeType(fileStatus.getPath().getName()));
        data.setModifiedDate(new Date(fileStatus.getModificationTime()));

        String uri;
        if (convertPath != null) {
          uri = convertToWGETURI(convertPath);
        } else {
          uri = fileStatus.getPath().toUri().toString();
        }
        data.addField("uri",uri);

        // We will record document fetch as an activity
        long startTime = System.currentTimeMillis();
        String errorCode = "FAILED";
        String errorDesc = StringUtils.EMPTY;
        long fileSize = 0;

        try {
          BackgroundStreamThread t = new BackgroundStreamThread(getSession(),new Path(documentIdentifier));
          try {
            t.start();
            boolean wasInterrupted = false;
            try {
              InputStream is = t.getSafeInputStream();
              try {
                data.setBinary(is, fileSize);
                activities.ingestDocument(documentIdentifier,version,uri,data);
              } finally {
                is.close();
              }
            } catch (java.net.SocketTimeoutException e) {
View Full Code Here

        // this is a content to ingest
        InputStream is = null;
        long fileLength = 0;
        try {
          //properties ingestion
          RepositoryDocument rd = new RepositoryDocument();     
          List<NamedValue> contentProperties = PropertiesUtils.getContentProperties(properties);
          PropertiesUtils.ingestProperties(rd, properties, contentProperties);

          // binaries ingestion - in Alfresco we could have more than one binary for each node (custom content models)
          for (NamedValue contentProperty : contentProperties) {
            //we are ingesting all the binaries defined as d:content property in the Alfresco content model
            Content binary = ContentReader.read(endpoint, username, password, socketTimeout, session, predicate, contentProperty.getName());
            fileLength = binary.getLength();
            is = ContentReader.getBinary(endpoint, binary, username, password, socketTimeout, session);
            rd.setBinary(is, fileLength);
           
            //id is the node reference only if the node has an unique content stream
            //For a node with a single d:content property: id = node reference
            String id = PropertiesUtils.getNodeReference(properties);
           
View Full Code Here

            if (index < version.length() && version.charAt(index++) == '+') {
              index = unpack(denyAclBuffer,version,index,'+');
            }

            //otherwise process
            RepositoryDocument rd = new RepositoryDocument();
             
            // Turn into acls and add into description
            String[] aclArray = new String[acls.size()];
            for (int j = 0; j < aclArray.length; j++) {
              aclArray[j] = (String)acls.get(j);
            }
            rd.setACL(aclArray);
            if (denyAclBuffer.length() > 0) {
              String[] denyAclArray = new String[]{denyAclBuffer.toString()};
              rd.setDenyACL(denyAclArray);
            }

            // Now do standard stuff
             
            String mimeType = "text/plain";
            Date createdDate = jiraFile.getCreatedDate();
            Date modifiedDate = jiraFile.getUpdatedDate();

            rd.setMimeType(mimeType);
            if (createdDate != null)
              rd.setCreatedDate(createdDate);
            if (modifiedDate != null)
              rd.setModifiedDate(modifiedDate);
           
            // Get general document metadata
            Map<String,String[]> metadataMap = jiraFile.getMetadata();
             
            for (Entry<String, String[]> entry : metadataMap.entrySet()) {
              rd.addField(entry.getKey(), entry.getValue());
            }

            String documentURI = jiraFile.getSelf();
            String document = getJiraBody(jiraFile);
            try {
              byte[] documentBytes = document.getBytes("UTF-8");
              InputStream is = new ByteArrayInputStream(documentBytes);
              try {
                rd.setBinary(is, documentBytes.length);
                activities.ingestDocument(nodeId, version, documentURI, rd);
                // No errors.  Record the fact that we made it.
                errorCode = "OK";
                fileSize = new Long(documentBytes.length);
              } finally {
View Full Code Here

            if (index < version.length() && version.charAt(index++) == '+') {
              index = unpack(denyAclBuffer,version,index,'+');
            }

            // content ingestion
            RepositoryDocument rd = new RepositoryDocument();

            // Turn into acls and add into description
            String[] aclArray = new String[acls.size()];
            for (int j = 0; j < aclArray.length; j++) {
              aclArray[j] = (String)acls.get(j);
            }
            rd.setACL(aclArray);
            if (denyAclBuffer.length() > 0) {
              String[] denyAclArray = new String[]{denyAclBuffer.toString()};
              rd.setDenyACL(denyAclArray);
            }

            // Length in bytes
            long fileLength = dropboxObject.bytes;
            //documentURI
            String documentURI = dropboxObject.path;

            if (dropboxObject.path != null)
              rd.setFileName(dropboxObject.path);
            if (dropboxObject.mimeType != null)
              rd.setMimeType(dropboxObject.mimeType);
            if (dropboxObject.modified != null)
              rd.setModifiedDate(com.dropbox.client2.RESTUtility.parseDate(dropboxObject.modified));
            // There doesn't appear to be a created date...
             
            rd.addField("Modified", dropboxObject.modified);
            rd.addField("Size", dropboxObject.size);
            rd.addField("Path", dropboxObject.path);
            rd.addField("Root", dropboxObject.root);
            rd.addField("ClientMtime", dropboxObject.clientMtime);
            rd.addField("mimeType", dropboxObject.mimeType);
            rd.addField("rev", dropboxObject.rev);
           
            getSession();
            BackgroundStreamThread t = new BackgroundStreamThread(nodeId);
            try {
              t.start();
              boolean wasInterrupted = false;
              try {
                InputStream is = t.getSafeInputStream();
                try {
                  rd.setBinary(is, fileLength);
                  activities.ingestDocument(nodeId, version, documentURI, rd);
                } finally {
                  is.close();
                }
              } catch (java.net.SocketTimeoutException e) {
View Full Code Here

        Document document = (Document) cmisObject;
        long fileLength = document.getContentStreamLength();
        InputStream is = null;
       
        try {
          RepositoryDocument rd = new RepositoryDocument();
          Date createdDate = document.getCreationDate().getTime();
          Date modifiedDate = document.getLastModificationDate().getTime();
         
          rd.setFileName(document.getContentStreamFileName());
          rd.setMimeType(document.getContentStreamMimeType());
          rd.setCreatedDate(createdDate);
          rd.setModifiedDate(modifiedDate);
         
          //binary
          if(fileLength>0 && document.getContentStream()!=null){
            is = document.getContentStream().getStream();
            rd.setBinary(is, fileLength);
          }

          //properties
          List<Property<?>> properties = document.getProperties();
          String id = StringUtils.EMPTY;
          for (Property<?> property : properties) {
            String propertyId = property.getId();
            if (propertyId.endsWith(Constants.PARAM_OBJECT_ID))
              id = (String) property.getValue();

            if (property.getValue() !=null
                || property.getValues() != null) {
              PropertyType propertyType = property.getType();

              switch (propertyType) {

              case STRING:
              case ID:
              case URI:
              case HTML:
                if(property.isMultiValued()){
                  List<String> htmlPropertyValues = (List<String>) property.getValues();
                  for (String htmlPropertyValue : htmlPropertyValues) {
                    rd.addField(propertyId, htmlPropertyValue);
                  }
                } else {
                  String stringValue = (String) property.getValue();
                  if(StringUtils.isNotEmpty(stringValue)){
                    rd.addField(propertyId, stringValue);
                  }
                }
                break;
    
              case BOOLEAN:
                if(property.isMultiValued()){
                  List<Boolean> booleanPropertyValues = (List<Boolean>) property.getValues();
                  for (Boolean booleanPropertyValue : booleanPropertyValues) {
                    rd.addField(propertyId, booleanPropertyValue.toString());
                  }
                } else {
                  Boolean booleanValue = (Boolean) property.getValue();
                  if(booleanValue!=null){
                    rd.addField(propertyId, booleanValue.toString());
                  }
                }
                break;

              case INTEGER:
                if(property.isMultiValued()){
                  List<BigInteger> integerPropertyValues = (List<BigInteger>) property.getValues();
                  for (BigInteger integerPropertyValue : integerPropertyValues) {
                    rd.addField(propertyId, integerPropertyValue.toString());
                  }
                } else {
                  BigInteger integerValue = (BigInteger) property.getValue();
                  if(integerValue!=null){
                    rd.addField(propertyId, integerValue.toString());
                  }
                }
                break;

              case DECIMAL:
                if(property.isMultiValued()){
                  List<BigDecimal> decimalPropertyValues = (List<BigDecimal>) property.getValues();
                  for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
                    rd.addField(propertyId, decimalPropertyValue.toString());
                  }
                } else {
                  BigDecimal decimalValue = (BigDecimal) property.getValue();
                  if(decimalValue!=null){
                    rd.addField(propertyId, decimalValue.toString());
                  }
                }
                break;

              case DATETIME:
                if(property.isMultiValued()){
                  List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>) property.getValues();
                  for (GregorianCalendar datePropertyValue : datePropertyValues) {
                    rd.addField(propertyId,
                        ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
                  }
                } else {
                  GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
                  if(dateValue!=null){
                    rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
                  }
                }
                break;

              default:
View Full Code Here

        }

        // It is a file to be indexed.
       
        // Prepare the metadata part of RepositoryDocument
        RepositoryDocument data = new RepositoryDocument();

        data.setFileName(fileStatus.getPath().getName());
        data.setMimeType(mapExtensionToMimeType(fileStatus.getPath().getName()));
        data.setModifiedDate(new Date(fileStatus.getModificationTime()));

        String uri;
        if (convertPath != null) {
          uri = convertToWGETURI(convertPath);
        } else {
          uri = fileStatus.getPath().toUri().toString();
        }
        data.addField("uri",uri);

        // We will record document fetch as an activity
        long startTime = System.currentTimeMillis();
        String errorCode = "FAILED";
        String errorDesc = StringUtils.EMPTY;
        long fileSize = 0;

        try {
          BackgroundStreamThread t = new BackgroundStreamThread(getSession(),new Path(documentIdentifier));
          try {
            t.start();
            boolean wasInterrupted = false;
            try {
              InputStream is = t.getSafeInputStream();
              try {
                data.setBinary(is, fileSize);
                activities.ingestDocument(documentIdentifier,version,uri,data);
              } finally {
                is.close();
              }
            } catch (java.net.SocketTimeoutException e) {
View Full Code Here

          SearchMessagesThread smt = new SearchMessagesThread(session, folder, messageIDTerm);
          smt.start();
          Message[] message = smt.finishUp();

          for (Message msg : message) {
            RepositoryDocument rd = new RepositoryDocument();
            Date setDate = msg.getSentDate();
            rd.setFileName(msg.getFileName());
            is = msg.getInputStream();
            rd.setBinary(is, msg.getSize());
            String subject = StringUtils.EMPTY;
            for (String metadata : requiredMetadata) {
              if (metadata.toLowerCase().equals(EmailConfig.EMAIL_TO)) {
                Address[] to = msg.getRecipients(Message.RecipientType.TO);
                String[] toStr = new String[to.length];
                int j = 0;
                for (Address address : to) {
                  toStr[j] = address.toString();
                }
                rd.addField(EmailConfig.EMAIL_TO, toStr);
              } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_FROM)) {
                Address[] from = msg.getFrom();
                String[] fromStr = new String[from.length];
                int j = 0;
                for (Address address : from) {
                  fromStr[j] = address.toString();
                }
                rd.addField(EmailConfig.EMAIL_TO, fromStr);

              } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_SUBJECT)) {
                subject = msg.getSubject();
                rd.addField(EmailConfig.EMAIL_SUBJECT, subject);
              } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_BODY)) {
                Multipart mp = (Multipart) msg.getContent();
                for (int j = 0, n = mp.getCount(); i < n; i++) {
                  Part part = mp.getBodyPart(i);
                  String disposition = part.getDisposition();
                  if ((disposition == null)) {
                    MimeBodyPart mbp = (MimeBodyPart) part;
                    if (mbp.isMimeType(EmailConfig.MIMETYPE_TEXT_PLAIN)) {
                      rd.addField(EmailConfig.EMAIL_BODY, mbp.getContent().toString());
                    } else if (mbp.isMimeType(EmailConfig.MIMETYPE_HTML)) {
                      rd.addField(EmailConfig.EMAIL_BODY, mbp.getContent().toString()); //handle html accordingly. Returns content with html tags
                    }
                  }
                }
              } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_DATE)) {
                Date sentDate = msg.getSentDate();
                rd.addField(EmailConfig.EMAIL_DATE, sentDate.toString());
              } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_ATTACHMENT_ENCODING)) {
                Multipart mp = (Multipart) msg.getContent();
                if (mp != null) {
                  String[] encoding = new String[mp.getCount()];
                  for (int k = 0, n = mp.getCount(); i < n; i++) {
                    Part part = mp.getBodyPart(i);
                    String disposition = part.getDisposition();
                    if ((disposition != null) &&
                        ((disposition.equals(Part.ATTACHMENT) ||
                            (disposition.equals(Part.INLINE))))) {
                      encoding[k] = part.getFileName().split("\\?")[1];

                    }
                  }
                  rd.addField(EmailConfig.ENCODING_FIELD, encoding);
                }
              } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_ATTACHMENT_MIMETYPE)) {
                Multipart mp = (Multipart) msg.getContent();
                String[] MIMEType = new String[mp.getCount()];
                for (int k = 0, n = mp.getCount(); i < n; i++) {
                  Part part = mp.getBodyPart(i);
                  String disposition = part.getDisposition();
                  if ((disposition != null) &&
                      ((disposition.equals(Part.ATTACHMENT) ||
                          (disposition.equals(Part.INLINE))))) {
                    MIMEType[k] = part.getContentType();

                  }
                }
                rd.addField(EmailConfig.MIMETYPE_FIELD, MIMEType);
              }
            }
            String documentURI = makeDocumentURI(urlTemplate, folderName, id);
            activities.ingestDocument(id, version, documentURI, rd);
View Full Code Here

              if (index < version.length() && version.charAt(index++) == '+') {
                index = unpack(denyAclBuffer,version,index,'+');
              }

              //otherwise process
              RepositoryDocument rd = new RepositoryDocument();

              // Turn into acls and add into description
              String[] aclArray = new String[acls.size()];
              for (int j = 0; j < aclArray.length; j++) {
                aclArray[j] = (String)acls.get(j);
              }
              rd.setACL(aclArray);
              if (denyAclBuffer.length() > 0) {
                String[] denyAclArray = new String[]{denyAclBuffer.toString()};
                rd.setDenyACL(denyAclArray);
              }

              // Now do standard stuff
              String mimeType = googleFile.getMimeType();
              DateTime createdDate = googleFile.getCreatedDate();
              DateTime modifiedDate = googleFile.getModifiedDate();
              String extension = googleFile.getFileExtension();
              String title = googleFile.getTitle();
             
              if (mimeType != null)
                rd.setMimeType(mimeType);
              if (createdDate != null)
                rd.setCreatedDate(new Date(createdDate.getValue()));
              if (modifiedDate != null)
                rd.setModifiedDate(new Date(modifiedDate.getValue()));
              if (extension != null)
              {
                if (title == null)
                  title = "";
                rd.setFileName(title + "." + extension);
              }

              // Get general document metadata
              for (Entry<String, Object> entry : googleFile.entrySet()) {
                rd.addField(entry.getKey(), entry.getValue().toString());
              }

              // Fire up the document reading thread
              DocumentReadingThread t = new DocumentReadingThread(documentURI);
              try {
                t.start();
                boolean wasInterrupted = false;
                try {
                  InputStream is = t.getSafeInputStream();
                  try {
                    // Can only index while background thread is running!
                    rd.setBinary(is, fileLength);
                    activities.ingestDocument(nodeId, version, documentURI, rd);
                  } finally {
                    is.close();
                  }
                } catch (ManifoldCFException e) {
View Full Code Here

TOP

Related Classes of org.apache.manifoldcf.agents.interfaces.RepositoryDocument

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.