Package com.alibaba.otter.node.etl.extract.extractor

Source Code of com.alibaba.otter.node.etl.extract.extractor.FreedomExtractor

/*
* Copyright (C) 2010-2101 Alibaba Group Holding Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.alibaba.otter.node.etl.extract.extractor;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.commons.lang.StringUtils;
import org.apache.ddlutils.model.Column;
import org.apache.ddlutils.model.Table;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.Assert;
import org.springframework.util.CollectionUtils;

import com.alibaba.otter.node.etl.common.db.dialect.DbDialect;
import com.alibaba.otter.node.etl.extract.exceptions.ExtractException;
import com.alibaba.otter.shared.common.model.config.ConfigException;
import com.alibaba.otter.shared.common.model.config.ConfigHelper;
import com.alibaba.otter.shared.common.model.config.channel.ChannelParameter.SyncConsistency;
import com.alibaba.otter.shared.common.model.config.channel.ChannelParameter.SyncMode;
import com.alibaba.otter.shared.common.model.config.data.DataMedia;
import com.alibaba.otter.shared.common.model.config.data.db.DbMediaSource;
import com.alibaba.otter.shared.common.model.config.pipeline.Pipeline;
import com.alibaba.otter.shared.etl.model.DbBatch;
import com.alibaba.otter.shared.etl.model.EventColumn;
import com.alibaba.otter.shared.etl.model.EventData;
import com.alibaba.otter.shared.etl.model.EventType;

/**
* 自由之门,允许手工触发数据订正,解析这些记录
*
* <pre>
* buffer表结构:
*  id , table_id ,  type , pk_data , gmt_create , gmt_modified
* pk_data针对多主键时,使用char(1)进行分隔
* </pre>
*
* @author jianghang 2012-4-25 下午04:41:33
* @version 4.0.2
*/
public class FreedomExtractor extends AbstractExtractor<DbBatch> {

    private static final Logger logger    = LoggerFactory.getLogger(FreedomExtractor.class);
    private static final char   PK_SPLIT  = (char) 1;
    // private static final String ID = "id";
    private static final String TABLE_ID  = "table_id";
    private static final String FULL_NAME = "full_name";
    private static final String TYPE      = "type";
    private static final String PK_DATA   = "pk_data";

    public void extract(DbBatch dbBatch) throws ExtractException {
        Assert.notNull(dbBatch);

        // 读取配置
        Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId());

        boolean skipFreedom = pipeline.getParameters().getSkipFreedom();
        String bufferSchema = pipeline.getParameters().getSystemSchema();
        String bufferTable = pipeline.getParameters().getSystemBufferTable();

        List<EventData> eventDatas = dbBatch.getRowBatch().getDatas();
        Set<EventData> removeDatas = new HashSet<EventData>();// 使用set,提升remove时的查找速度
        for (EventData eventData : eventDatas) {
            if (StringUtils.equalsIgnoreCase(bufferSchema, eventData.getSchemaName())
                && StringUtils.equalsIgnoreCase(bufferTable, eventData.getTableName())) {
                if (eventData.getEventType().isDdl()) {
                    continue;
                }

                if (skipFreedom) {// 判断是否需要忽略
                    removeDatas.add(eventData);
                    continue;
                }

                // 只处理insert / update记录
                if (eventData.getEventType().isInsert() || eventData.getEventType().isUpdate()) {
                    // 重新改写一下EventData的数据,根据系统表的定义
                    EventColumn tableIdColumn = getMatchColumn(eventData.getColumns(), TABLE_ID);
                    // 获取到对应tableId的media信息
                    try {
                        DataMedia dataMedia = null;
                        Long tableId = Long.valueOf(tableIdColumn.getColumnValue());
                        eventData.setTableId(tableId);
                        if (tableId <= 0) { // 直接按照full_name进行查找
                            // 尝试直接根据schema+table name进行查找
                            EventColumn fullNameColumn = getMatchColumn(eventData.getColumns(), FULL_NAME);
                            if (fullNameColumn != null) {
                                String[] names = StringUtils.split(fullNameColumn.getColumnValue(), ".");
                                if (names.length >= 2) {
                                    dataMedia = ConfigHelper.findSourceDataMedia(pipeline, names[0], names[1]);
                                    eventData.setTableId(dataMedia.getId());
                                } else {
                                    throw new ConfigException("no such DataMedia " + names);
                                }
                            }
                        } else {
                            // 如果指定了tableId,需要按照tableId进行严格查找,如果没找到,那说明不需要进行同步
                            dataMedia = ConfigHelper.findDataMedia(pipeline,
                                Long.valueOf(tableIdColumn.getColumnValue()));
                        }

                        DbDialect dbDialect = dbDialectFactory.getDbDialect(pipeline.getId(),
                            (DbMediaSource) dataMedia.getSource());
                        // 考虑offer[1-128]的配置模式
                        if (!dataMedia.getNameMode().getMode().isSingle()
                            || !dataMedia.getNamespaceMode().getMode().isSingle()) {
                            boolean hasError = true;
                            EventColumn fullNameColumn = getMatchColumn(eventData.getColumns(), FULL_NAME);
                            if (fullNameColumn != null) {
                                String[] names = StringUtils.split(fullNameColumn.getColumnValue(), ".");
                                if (names.length >= 2) {
                                    eventData.setSchemaName(names[0]);
                                    eventData.setTableName(names[1]);
                                    hasError = false;
                                }
                            }

                            if (hasError) {
                                // 出现异常,需要记录一下
                                logger.warn("dataMedia mode:{} , fullname:{} ",
                                    dataMedia.getMode(),
                                    fullNameColumn == null ? null : fullNameColumn.getColumnValue());
                                removeDatas.add(eventData);
                                // 跳过这条记录
                                continue;
                            }
                        } else {
                            eventData.setSchemaName(dataMedia.getNamespace());
                            eventData.setTableName(dataMedia.getName());
                        }

                        // 更新业务类型
                        EventColumn typeColumn = getMatchColumn(eventData.getColumns(), TYPE);
                        EventType eventType = EventType.valuesOf(typeColumn.getColumnValue());
                        eventData.setEventType(eventType);
                        if (eventType.isUpdate()) {// 如果是update强制修改为insert,这样可以在目标端执行merge
                                                   // sql
                            eventData.setEventType(EventType.INSERT);
                        } else if (eventType.isDdl()) {
                            dbDialect.reloadTable(eventData.getSchemaName(), eventData.getTableName());
                            removeDatas.add(eventData);// 删除当前记录
                            continue;
                        }
                        // 重新构建新的业务主键字段
                        EventColumn pkDataColumn = getMatchColumn(eventData.getColumns(), PK_DATA);
                        String pkData = pkDataColumn.getColumnValue();
                        String[] pks = StringUtils.split(pkData, PK_SPLIT);

                        Table table = dbDialect.findTable(eventData.getSchemaName(), eventData.getTableName());
                        List<EventColumn> newColumns = new ArrayList<EventColumn>();
                        Column[] primaryKeyColumns = table.getPrimaryKeyColumns();
                        if (primaryKeyColumns.length > pks.length) {
                            throw new ExtractException("data pk column size not match , data:" + eventData.toString());
                        }
                        // 构建字段
                        Column[] allColumns = table.getColumns();
                        int pkIndex = 0;
                        for (int i = 0; i < allColumns.length; i++) {
                            Column column = allColumns[i];
                            if (column.isPrimaryKey()) {
                                EventColumn newColumn = new EventColumn();
                                newColumn.setIndex(i); // 设置下标
                                newColumn.setColumnName(column.getName());
                                newColumn.setColumnType(column.getTypeCode());
                                newColumn.setColumnValue(pks[pkIndex]);
                                newColumn.setKey(true);
                                newColumn.setNull(pks[pkIndex] == null);
                                newColumn.setUpdate(true);
                                // 添加到记录
                                newColumns.add(newColumn);
                                pkIndex++;
                            }
                        }
                        // 设置数据
                        eventData.setKeys(newColumns);
                        eventData.setOldKeys(new ArrayList<EventColumn>());
                        eventData.setColumns(new ArrayList<EventColumn>());
                        // 设置为行记录+反查
                        eventData.setSyncMode(SyncMode.ROW);
                        eventData.setSyncConsistency(SyncConsistency.MEDIA);
                        eventData.setRemedy(true);
                        eventData.setSize(1024);// 默认为1kb,如果还是按照binlog大小计算的话,可能会采用rpc传输,导致内存不够用
                    } catch (ConfigException e) {
                        // 忽略掉,因为系统表会被共享,所以这条记录会被不是该同步通道给获取到
                        logger.info("find DataMedia error " + eventData.toString(), e);
                        removeDatas.add(eventData);
                        continue;
                    } catch (Throwable e) {
                        // 出现异常时忽略掉
                        logger.warn("process freedom data error " + eventData.toString(), e);
                        removeDatas.add(eventData);
                        continue;
                    }
                } else {
                    removeDatas.add(eventData);// 删除该记录
                }
            }
        }

        if (!CollectionUtils.isEmpty(removeDatas)) {
            eventDatas.removeAll(removeDatas);
        }
    }

    private EventColumn getMatchColumn(List<EventColumn> columns, String columnName) {
        for (EventColumn column : columns) {
            if (StringUtils.equalsIgnoreCase(column.getColumnName(), columnName)) {
                return column;
            }
        }

        return null;
    }

}
TOP

Related Classes of com.alibaba.otter.node.etl.extract.extractor.FreedomExtractor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.