Package org.apache.manifoldcf.agents.interfaces

Examples of org.apache.manifoldcf.agents.interfaces.RepositoryDocument


                        Logging.connectors.debug("JCIFS: Decided to ingest '"+documentIdentifier+"'");
                      // OK, do ingestion itself!
                      InputStream inputStream = new FileInputStream(tempFile);
                      try
                      {
                        RepositoryDocument rd = new RepositoryDocument();
                        rd.setBinary(inputStream, tempFile.length());
                        rd.setFileName(file.getName());
                        String contentType = mapExtensionToMimeType(file.getName());
                        if (contentType != null)
                          rd.setMimeType(contentType);
                        rd.addField("lastModified", new Date(file.lastModified()).toString());
                        int index = 0;
                        index = setDocumentSecurity(rd,version,index);
                        index = setPathMetadata(rd,version,index);
                        StringBuilder ingestURI = new StringBuilder();
                        index = unpack(ingestURI,version,index,'+');
                        activities.ingestDocument(documentIdentifier, version, ingestURI.toString(), rd);
                      }
                      finally
                      {
                        inputStream.close();
                      }

                      // I put this record here deliberately for two reasons:
                      // (1) the other path includes ingestion time, and
                      // (2) if anything fails up to and during ingestion, I want THAT failure record to be written, not this one.
                      // So, really, ACTIVITY_ACCESS is a bit more than just fetch for JCIFS...
                      activities.recordActivity(new Long(startFetchTime),ACTIVITY_ACCESS,
                        new Long(tempFile.length()),documentIdentifier,"Success",null,null);

                    }
                    else
                    {
                      // We must actively remove the document here, because the getDocumentVersions()
                      // method has no way of signalling this, since it does not do the fingerprinting.
                      if (Logging.connectors.isDebugEnabled())
                        Logging.connectors.debug("JCIFS: Decided to remove '"+documentIdentifier+"'");
                      activities.deleteDocument(documentIdentifier, version);
                      // We should record the access here as well, since this is a non-exception way through the code path.
                      // (I noticed that this was not being recorded in the history while fixing 25477.)
                      activities.recordActivity(new Long(startFetchTime),ACTIVITY_ACCESS,
                        new Long(tempFile.length()),documentIdentifier,"Success",null,null);
                    }
                  }
                  finally
                  {
                    tempFile.delete();
                  }
                }
                else
                {
                  if (Logging.connectors.isDebugEnabled())
                    Logging.connectors.debug("JCIFS: Local file data not needed for '"+documentIdentifier+"'");

                  // Presume that since the file was queued that it fulfilled the needed criteria.
                  // Go off and ingest the fast way.

                  // Ingest the document.
                  InputStream inputStream = getFileInputStream(file);
                  try
                  {
                    RepositoryDocument rd = new RepositoryDocument();
                    rd.setBinary(inputStream, fileLength(file));
                    rd.setFileName(file.getName());
                    String contentType = mapExtensionToMimeType(file.getName());
                    if (contentType != null)
                      rd.setMimeType(contentType);
                    rd.addField("lastModified", new Date(file.lastModified()).toString());
                    int index = 0;
                    index = setDocumentSecurity(rd,version,index);
                    index = setPathMetadata(rd,version,index);
                    StringBuilder ingestURI = new StringBuilder();
                    index = unpack(ingestURI,version,index,'+');
View Full Code Here


        long fileLength = document.getContentStreamLength();

        InputStream is = document.getContentStream().getStream();

        try {
          RepositoryDocument rd = new RepositoryDocument();
         
          //binary
          rd.setBinary(is, fileLength);

          //properties
          List<Property<?>> properties = document.getProperties();
          String id = StringUtils.EMPTY;
          for (Property<?> property : properties) {
            String propertyId = property.getId();
            Object propertyValue = property.getValue();
            if (propertyId.endsWith(Constants.PARAM_OBJECT_ID))
              id = (String) propertyValue;

            if (propertyValue != null) {
              PropertyType propertyType = property.getType();

              switch (propertyType) {

              case STRING:
              case ID:
              case URI:
              case HTML:
                String stringValue = (String) propertyValue;
                rd.addField(propertyId, stringValue);
                break;

              case BOOLEAN:
                Boolean booleanValue = (Boolean) propertyValue;
                rd.addField(propertyId, booleanValue.toString());
                break;

              case INTEGER:
                BigInteger integerValue = (BigInteger) propertyValue;
                rd.addField(propertyId, integerValue.toString());
                break;

              case DECIMAL:
                BigDecimal decimalValue = (BigDecimal) propertyValue;
                rd.addField(propertyId, decimalValue.toString());
                break;

              case DATETIME:
                GregorianCalendar dateValue = (GregorianCalendar) propertyValue;
                rd.addField(propertyId,
                    ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
                break;

              default:
                break;
View Full Code Here

        Document document = (Document) cmisObject;
        long fileLength = document.getContentStreamLength();
        InputStream is = null;
       
        try {
          RepositoryDocument rd = new RepositoryDocument();
         
          //binary
          if(fileLength>0 && document.getContentStream()!=null){
            is = document.getContentStream().getStream();
            rd.setBinary(is, fileLength);
          }

          //properties
          List<Property<?>> properties = document.getProperties();
          String id = StringUtils.EMPTY;
          for (Property<?> property : properties) {
            String propertyId = property.getId();
            if (propertyId.endsWith(Constants.PARAM_OBJECT_ID))
              id = (String) property.getValue();

            if (property.getValue() !=null
                || property.getValues() != null) {
              PropertyType propertyType = property.getType();

              switch (propertyType) {

              case STRING:
              case ID:
              case URI:
              case HTML:
                if(property.isMultiValued()){
                  List<String> htmlPropertyValues = (List<String>) property.getValues();
                  for (String htmlPropertyValue : htmlPropertyValues) {
                    rd.addField(propertyId, htmlPropertyValue);
                  }
                } else {
                  String stringValue = (String) property.getValue();
                  if(StringUtils.isNotEmpty(stringValue)){
                    rd.addField(propertyId, stringValue);
                  }
                }
                break;
    
              case BOOLEAN:
                if(property.isMultiValued()){
                  List<Boolean> booleanPropertyValues = (List<Boolean>) property.getValues();
                  for (Boolean booleanPropertyValue : booleanPropertyValues) {
                    rd.addField(propertyId, booleanPropertyValue.toString());
                  }
                } else {
                  Boolean booleanValue = (Boolean) property.getValue();
                  if(booleanValue!=null){
                    rd.addField(propertyId, booleanValue.toString());
                  }
                }
                break;

              case INTEGER:
                if(property.isMultiValued()){
                  List<BigInteger> integerPropertyValues = (List<BigInteger>) property.getValues();
                  for (BigInteger integerPropertyValue : integerPropertyValues) {
                    rd.addField(propertyId, integerPropertyValue.toString());
                  }
                } else {
                  BigInteger integerValue = (BigInteger) property.getValue();
                  if(integerValue!=null){
                    rd.addField(propertyId, integerValue.toString());
                  }
                }
                break;

              case DECIMAL:
                if(property.isMultiValued()){
                  List<BigDecimal> decimalPropertyValues = (List<BigDecimal>) property.getValues();
                  for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
                    rd.addField(propertyId, decimalPropertyValue.toString());
                  }
                } else {
                  BigDecimal decimalValue = (BigDecimal) property.getValue();
                  if(decimalValue!=null){
                    rd.addField(propertyId, decimalValue.toString());
                  }
                }
                break;

              case DATETIME:
                if(property.isMultiValued()){
                  List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>) property.getValues();
                  for (GregorianCalendar datePropertyValue : datePropertyValues) {
                    rd.addField(propertyId,
                        ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
                  }
                } else {
                  GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
                  if(dateValue!=null){
                    rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
                  }
                }
                break;

              default:
View Full Code Here

        // this is a content to ingest
        InputStream is = null;
        long fileLength = 0;
        try {
          //properties ingestion
          RepositoryDocument rd = new RepositoryDocument();
          PropertiesUtils.ingestProperties(rd, properties);

          // binaries ingestion - in Alfresco we could have more than one binary for each node (custom content models)
          List<NamedValue> contentProperties = PropertiesUtils.getContentProperties(properties);
          for (NamedValue contentProperty : contentProperties) {
            //we are ingesting all the binaries defined as d:content property in the Alfresco content model
            Content binary = ContentReader.read(username, password, session, predicate, contentProperty.getName());
            fileLength = binary.getLength();
            is = ContentReader.getBinary(binary, username, password, session);
            rd.setBinary(is, fileLength);
          }

        } finally {
          try {
            if(is!=null){
View Full Code Here

        // this is a content to ingest
        InputStream is = null;
        long fileLength = 0;
        try {
          //properties ingestion
          RepositoryDocument rd = new RepositoryDocument();     
          List<NamedValue> contentProperties = PropertiesUtils.getContentProperties(properties);
          PropertiesUtils.ingestProperties(rd, properties, contentProperties);

          // binaries ingestion - in Alfresco we could have more than one binary for each node (custom content models)
          for (NamedValue contentProperty : contentProperties) {
            //we are ingesting all the binaries defined as d:content property in the Alfresco content model
            Content binary = ContentReader.read(username, password, session, predicate, contentProperty.getName());
            fileLength = binary.getLength();
            is = ContentReader.getBinary(binary, username, password, session);
            rd.setBinary(is, fileLength);
           
            //id is the node reference only if the node has an unique content stream
            //For a node with a single d:content property: id = node reference
            String id = PropertiesUtils.getNodeReference(properties);
           
View Full Code Here

              long startFetchTime = System.currentTimeMillis();
              String fileName = getFileCanonicalPath(file);
              if (fileName != null && !file.isHidden())
              {
                // Initialize repository document with common stuff, and find the URI
                RepositoryDocument rd = new RepositoryDocument();
                String uri = prepareForIndexing(rd,file,version);

                if (activities.checkURLIndexable(uri))
                {

                  // manipulate path to include the DFS alias, not the literal path
                  // String newPath = matchPrefix + fileName.substring(matchReplace.length());
                  String newPath = fileName;
                  if (checkNeedFileData(newPath, spec))
                  {
                    if (Logging.connectors.isDebugEnabled())
                      Logging.connectors.debug("JCIFS: Local file data needed for '"+documentIdentifier+"'");

                    // Create a temporary file, and use that for the check and then the ingest
                    File tempFile = File.createTempFile("_sdc_",null);
                    try
                    {
                      FileOutputStream os = new FileOutputStream(tempFile);
                      try
                      {

                        // Now, make a local copy so we can fingerprint
                        InputStream inputStream = getFileInputStream(file);
                        try
                        {
                          // Copy!
                          if (transferBuffer == null)
                            transferBuffer = new byte[65536];
                          while (true)
                          {
                            int amt = inputStream.read(transferBuffer,0,transferBuffer.length);
                            if (amt == -1)
                              break;
                            os.write(transferBuffer,0,amt);
                          }
                        }
                        finally
                        {
                          inputStream.close();
                        }
                      }
                      finally
                      {
                        os.close();
                      }

                      if (checkIngest(tempFile, newPath, spec, activities))
                      {
                        if (Logging.connectors.isDebugEnabled())
                          Logging.connectors.debug("JCIFS: Decided to ingest '"+documentIdentifier+"'");
                        // OK, do ingestion itself!
                        InputStream inputStream = new FileInputStream(tempFile);
                        try
                        {
                          rd.setBinary(inputStream, tempFile.length());
                         
                          activities.ingestDocument(documentIdentifier, version, uri, rd);
                        }
                        finally
                        {
                          inputStream.close();
                        }

                        // I put this record here deliberately for two reasons:
                        // (1) the other path includes ingestion time, and
                        // (2) if anything fails up to and during ingestion, I want THAT failure record to be written, not this one.
                        // So, really, ACTIVITY_ACCESS is a bit more than just fetch for JCIFS...
                        activities.recordActivity(new Long(startFetchTime),ACTIVITY_ACCESS,
                          new Long(tempFile.length()),documentIdentifier,"Success",null,null);

                      }
                      else
                      {
                        // We must actively remove the document here, because the getDocumentVersions()
                        // method has no way of signalling this, since it does not do the fingerprinting.
                        if (Logging.connectors.isDebugEnabled())
                          Logging.connectors.debug("JCIFS: Decided to remove '"+documentIdentifier+"'");
                        activities.deleteDocument(documentIdentifier, version);
                        // We should record the access here as well, since this is a non-exception way through the code path.
                        // (I noticed that this was not being recorded in the history while fixing 25477.)
                        activities.recordActivity(new Long(startFetchTime),ACTIVITY_ACCESS,
                          new Long(tempFile.length()),documentIdentifier,"Success",null,null);
                      }
                    }
                    finally
                    {
                      tempFile.delete();
                    }
                  }
                  else
                  {
                    if (Logging.connectors.isDebugEnabled())
                      Logging.connectors.debug("JCIFS: Local file data not needed for '"+documentIdentifier+"'");

                    // Presume that since the file was queued that it fulfilled the needed criteria.
                    // Go off and ingest the fast way.

                    // Ingest the document.
                    InputStream inputStream = getFileInputStream(file);
                    try
                    {
                      rd.setBinary(inputStream, fileLength(file));
                     
                      activities.ingestDocument(documentIdentifier, version, uri, rd);
                    }
                    finally
                    {
View Full Code Here

        long fileLength = dropboxObject.bytes;
        InputStream is = null;

        try {
          RepositoryDocument rd = new RepositoryDocument();

          //binary
          if (fileLength > 0) {
            is = getInputStream(nodeId);
            rd.setBinary(is, fileLength);
          }

          rd.addField("Modified", dropboxObject.modified);
          rd.addField("Size", dropboxObject.size);
          rd.addField("Path", dropboxObject.path);
          rd.addField("Root", dropboxObject.root);
          rd.addField("ClientMtime", dropboxObject.clientMtime);
          rd.addField("mimeType", dropboxObject.mimeType);
          rd.addField("rev", dropboxObject.rev);

          //ingestion
          String version = dropboxObject.rev;
          if (StringUtils.isEmpty(version)) {
            version = StringUtils.EMPTY;
View Full Code Here

        Document document = (Document) cmisObject;
        long fileLength = document.getContentStreamLength();
        InputStream is = null;
       
        try {
          RepositoryDocument rd = new RepositoryDocument();
          Date createdDate = document.getCreationDate().getTime();
          Date modifiedDate = document.getLastModificationDate().getTime();
         
          rd.setFileName(document.getContentStreamFileName());
          rd.setMimeType(document.getContentStreamMimeType());
          rd.setCreatedDate(createdDate);
          rd.setModifiedDate(modifiedDate);
         
          //binary
          if(fileLength>0 && document.getContentStream()!=null){
            is = document.getContentStream().getStream();
            rd.setBinary(is, fileLength);
          }

          //properties
          List<Property<?>> properties = document.getProperties();
          String id = StringUtils.EMPTY;
          for (Property<?> property : properties) {
            String propertyId = property.getId();
            if (propertyId.endsWith(Constants.PARAM_OBJECT_ID))
              id = (String) property.getValue();

            if (property.getValue() !=null
                || property.getValues() != null) {
              PropertyType propertyType = property.getType();

              switch (propertyType) {

              case STRING:
              case ID:
              case URI:
              case HTML:
                if(property.isMultiValued()){
                  List<String> htmlPropertyValues = (List<String>) property.getValues();
                  for (String htmlPropertyValue : htmlPropertyValues) {
                    rd.addField(propertyId, htmlPropertyValue);
                  }
                } else {
                  String stringValue = (String) property.getValue();
                  if(StringUtils.isNotEmpty(stringValue)){
                    rd.addField(propertyId, stringValue);
                  }
                }
                break;
    
              case BOOLEAN:
                if(property.isMultiValued()){
                  List<Boolean> booleanPropertyValues = (List<Boolean>) property.getValues();
                  for (Boolean booleanPropertyValue : booleanPropertyValues) {
                    rd.addField(propertyId, booleanPropertyValue.toString());
                  }
                } else {
                  Boolean booleanValue = (Boolean) property.getValue();
                  if(booleanValue!=null){
                    rd.addField(propertyId, booleanValue.toString());
                  }
                }
                break;

              case INTEGER:
                if(property.isMultiValued()){
                  List<BigInteger> integerPropertyValues = (List<BigInteger>) property.getValues();
                  for (BigInteger integerPropertyValue : integerPropertyValues) {
                    rd.addField(propertyId, integerPropertyValue.toString());
                  }
                } else {
                  BigInteger integerValue = (BigInteger) property.getValue();
                  if(integerValue!=null){
                    rd.addField(propertyId, integerValue.toString());
                  }
                }
                break;

              case DECIMAL:
                if(property.isMultiValued()){
                  List<BigDecimal> decimalPropertyValues = (List<BigDecimal>) property.getValues();
                  for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
                    rd.addField(propertyId, decimalPropertyValue.toString());
                  }
                } else {
                  BigDecimal decimalValue = (BigDecimal) property.getValue();
                  if(decimalValue!=null){
                    rd.addField(propertyId, decimalValue.toString());
                  }
                }
                break;

              case DATETIME:
                if(property.isMultiValued()){
                  List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>) property.getValues();
                  for (GregorianCalendar datePropertyValue : datePropertyValues) {
                    rd.addField(propertyId,
                        ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
                  }
                } else {
                  GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
                  if(dateValue!=null){
                    rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
                  }
                }
                break;

              default:
View Full Code Here

        }

        // It is a file to be indexed.
       
        // Prepare the metadata part of RepositoryDocument
        RepositoryDocument data = new RepositoryDocument();

        data.setFileName(fileStatus.getPath().getName());
        data.setMimeType(mapExtensionToMimeType(fileStatus.getPath().getName()));
        data.setModifiedDate(new Date(fileStatus.getModificationTime()));

        String uri;
        if (convertPath != null) {
          uri = convertToWGETURI(convertPath);
        } else {
          uri = fileStatus.getPath().toUri().toString();
        }
        data.addField("uri",uri);

        // We will record document fetch as an activity
        long startTime = System.currentTimeMillis();
        String errorCode = "FAILED";
        String errorDesc = StringUtils.EMPTY;
        long fileSize = 0;

        try {
          BackgroundStreamThread t = new BackgroundStreamThread(getSession(),new Path(documentIdentifier));
          try {
            t.start();
            boolean wasInterrupted = false;
            try {
              InputStream is = t.getSafeInputStream();
              try {
                data.setBinary(is, fileSize);
                activities.ingestDocumentWithException(documentIdentifier,version,uri,data);
              } finally {
                is.close();
              }
            } catch (java.net.SocketTimeoutException e) {
View Full Code Here

            String _id = documentIdentifiers[i];
            String version = versions[i];
            getSession();
            GridFS gfs = new GridFS(session, bucket);

            RepositoryDocument rd = new RepositoryDocument();
            if (Logging.connectors.isDebugEnabled()) {
                Logging.connectors.debug("GridFS: Processing document _id = " + _id);
            }

            GridFSDBFile document = gfs.findOne(new ObjectId(_id));

            if (document == null) {
                activities.deleteDocument(_id);
                i++;
                continue;
            }

            DBObject metadata = document.getMetaData();
            if (metadata == null) {
                Logging.connectors.warn("GridFS: Document " + _id + " has a null metadata - skipping.");
                i++;
                continue;
            }

            String urlValue = document.getMetaData().get(this.url) == null
                    ? StringUtils.EMPTY
                    : document.getMetaData().get(this.url).toString();
            if (!StringUtils.isEmpty(urlValue)) {
                if (!scanOnly[i]) {
                    boolean validURL;
                    try {
                        new java.net.URI(urlValue);
                        validURL = true;
                    } catch (java.net.URISyntaxException e) {
                        validURL = false;
                    }
                    if (validURL) {
                        long fileLenght = document.getLength();
                        InputStream is = document.getInputStream();
                        try {
                            Date indexingDate = new Date();
                            rd.setBinary(is, fileLenght);
                            rd.setCreatedDate(document.getUploadDate());
                            rd.setFileName(document.getFilename());
                            rd.setIndexingDate(indexingDate);
                            rd.setMimeType(document.getContentType());
                            String[] aclsArray = null;
                            String[] denyAclsArray = null;
                            if (acl != null) {
                                try {
                                    Object aclObject = document.getMetaData().get(acl);
                                    if (aclObject != null) {
                                        List<String> acls = (List<String>) aclObject;
                                        aclsArray = (String[]) acls.toArray();
                                    }
                                } catch (ClassCastException e) {
                                    // This is bad because security will fail
                                    Logging.connectors.warn("GridFS: Document " + _id + " metadata ACL field doesn't contain List<String> type.");
                                    throw new ManifoldCFException("Security decoding error: "+e.getMessage(),e);
                                }
                            }
                            if (denyAcl != null) {
                                try {
                                    Object denyAclObject = document.getMetaData().get(denyAcl);
                                    if (denyAclObject != null) {
                                        List<String> denyAcls = (List<String>) denyAclObject;
                                        denyAcls.add(GLOBAL_DENY_TOKEN);
                                        denyAclsArray = (String[]) denyAcls.toArray();
                                    }
                                } catch (ClassCastException e) {
                                    // This is bad because security will fail
                                    Logging.connectors.warn("GridFS: Document " + _id + " metadata DenyACL field doesn't contain List<String> type.");
                                    throw new ManifoldCFException("Security decoding error: "+e.getMessage(),e);
                                }
                            }
                            rd.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT,aclsArray,denyAclsArray);
                            try {
                                activities.ingestDocumentWithException(_id, version, urlValue, rd);
                            } catch (IOException e) {
                                handleIOException(e);
                            }
View Full Code Here

TOP

Related Classes of org.apache.manifoldcf.agents.interfaces.RepositoryDocument

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.